From 14239face2d1b8cfb1791e1fe34228f15847be28 Mon Sep 17 00:00:00 2001 From: AlladaSravani Date: Thu, 14 Nov 2024 21:22:41 +0530 Subject: [PATCH] Update Project_Outline.ipynb --- Project_Outline.ipynb | 107 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/Project_Outline.ipynb b/Project_Outline.ipynb index e47f144..0cb511f 100644 --- a/Project_Outline.ipynb +++ b/Project_Outline.ipynb @@ -1 +1,106 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Project Outline.ipynb","provenance":[],"authorship_tag":"ABX9TyPZl4d0nA5Qmq8X1mDqSb1O"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# **Title of Project**"],"metadata":{"id":"dqZ-nhxiganh"}},{"cell_type":"markdown","source":["-------------"],"metadata":{"id":"gScHkw6jjrLo"}},{"cell_type":"markdown","source":["## **Objective**"],"metadata":{"id":"Xns_rCdhh-vZ"}},{"cell_type":"markdown","source":[""],"metadata":{"id":"9sPvnFM1iI9l"}},{"cell_type":"markdown","source":["## **Data Source**"],"metadata":{"id":"-Vbnt9CciKJP"}},{"cell_type":"markdown","source":[""],"metadata":{"id":"sGcv5WqQiNyl"}},{"cell_type":"markdown","source":["## **Import Library**"],"metadata":{"id":"r7GrZzX0iTlV"}},{"cell_type":"code","source":[""],"metadata":{"id":"UkK6NH9DiW-X"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## **Import Data**"],"metadata":{"id":"9lHPQj1XiOUc"}},{"cell_type":"code","source":[""],"metadata":{"id":"zcU1fdnGho6M"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## **Describe Data**"],"metadata":{"id":"7PUnimBoiX-x"}},{"cell_type":"code","source":[""],"metadata":{"id":"kG15arusiZ8Z"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## **Data Visualization**"],"metadata":{"id":"oBGX4Ekniriz"}},{"cell_type":"code","source":[""],"metadata":{"id":"lW-OIRK0iuzO"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## **Data Preprocessing**"],"metadata":{"id":"UqfyPOCYiiww"}},{"cell_type":"code","source":[""],"metadata":{"id":"3cyr3fbGin0A"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## **Define Target Variable (y) and Feature Variables (X)**"],"metadata":{"id":"2jXJpdAuiwYW"}},{"cell_type":"code","source":[""],"metadata":{"id":"QBCakTuli57t"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## **Train Test Split**"],"metadata":{"id":"90_0q_Pbi658"}},{"cell_type":"code","source":[""],"metadata":{"id":"u60YYaOFi-Dw"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## **Modeling**"],"metadata":{"id":"cIhyseNria7W"}},{"cell_type":"code","source":[""],"metadata":{"id":"Toq58wpkjCw7"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## **Model Evaluation**"],"metadata":{"id":"vhAwWfG0jFun"}},{"cell_type":"code","source":[""],"metadata":{"id":"lND3jJj_jhx4"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## **Prediction**"],"metadata":{"id":"8AzwG7oLjiQI"}},{"cell_type":"code","source":[""],"metadata":{"id":"JLebGzDJjknA"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## **Explaination**"],"metadata":{"id":"SBo38CJZjlEX"}},{"cell_type":"markdown","source":[""],"metadata":{"id":"Ybi8FR9Kjv00"}}]} \ No newline at end of file +Project Title: Mileage Prediction using Regression Analysis + +Objective: + +1. Develop a model to predict the mileage of a vehicle based on its attributes. +2. Analyze the relationship between the attributes and mileage. +3. Evaluate the performance of the model. + +Dataset: + +- Auto MPG Dataset (available on Kaggle) +- Attributes: + - Cylinders (cyl) + - Displacement (disp) + - Horsepower (hp) + - Weight (wt) + - Acceleration (acc) + - Model Year (year) + - Origin (origin) +- Target variable: Mileage (mpg) + +Methodology: + +1. Data Preprocessing: + - Handling missing values + - Normalization +2. Feature Selection: + - Correlation analysis + - Recursive feature elimination +3. Regression Analysis: + - Simple Linear Regression (SLR) + - Multiple Linear Regression (MLR) + - Polynomial Regression + - Ridge Regression + - Lasso Regression +4. Model Evaluation: + - Mean Squared Error (MSE) + - Mean Absolute Error (MAE) + - Coefficient of Determination (R-squared) + +Code: + +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score +from sklearn.preprocessing import PolynomialFeatures +from sklearn.pipeline import Pipeline + +# Load dataset +df = pd.read_csv('auto-mpg.csv') + +# Preprocess data +df = df.dropna() +df['origin'] = pd.get_dummies(df['origin']) + +# Split data +X = df.drop('mpg', axis=1) +y = df['mpg'] +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +# Simple Linear Regression (SLR) +slr = LinearRegression() +slr.fit(X_train[['cyl']], y_train) +y_pred_slr = slr.predict(X_test[['cyl']]) +print('SLR MSE:', mean_squared_error(y_test, y_pred_slr)) +print('SLR MAE:', mean_absolute_error(y_test, y_pred_slr)) +print('SLR R-squared:', r2_score(y_test, y_pred_slr)) + +# Multiple Linear Regression (MLR) +mlr = LinearRegression() +mlr.fit(X_train, y_train) +y_pred_mlr = mlr.predict(X_test) +print('MLR MSE:', mean_squared_error(y_test, y_pred_mlr)) +print('MLR MAE:', mean_absolute_error(y_test, y_pred_mlr)) +print('MLR R-squared:', r2_score(y_test, y_pred_mlr)) + +# Polynomial Regression +poly = PolynomialFeatures(degree=2) +poly.fit(X_train) +X_train_poly = poly.transform(X_train) +X_test_poly = poly.transform(X_test) +pr = LinearRegression() +pr.fit(X_train_poly, y_train) +y_pred_pr = pr.predict(X_test_poly) +print('Polynomial Regression MSE:', mean_squared_error(y_test, y_pred_pr)) +print('Polynomial Regression MAE:', mean_absolute_error(y_test, y_pred_pr)) +print('Polynomial Regression R-squared:', r2_score(y_test, y_pred_pr)) + +Challenges: + +1. Feature selection +2. Model selection +3. Overfitting + +Future Work: + +1. Incorporate additional features (e.g., engine type, transmission type) +2. Use more advanced regression techniques (e.g., gradient boosting, random forest) +3. Develop a web interface for user input and model prediction + +References: + +1. "Auto MPG Dataset" by UCI Machine Learning Repository +2. "Regression Analysis" by Wikipedia +3. "Polynomial Regression" by Scikit-learn Documentation