ndvtechsyssolutions · ribkaaramalla322 · Jul 10, 2025
diff --git a/NDV_Code_By_RibkaA_Linear_Regression/LinearRegression.py b/NDV_Code_By_RibkaA_Linear_Regression/LinearRegression.py
@@ -0,0 +1,80 @@
+# Import necessary libraries
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error, r2_score
+
+# Load dataset
+# If using CSV: df = pd.read_csv('Salary_Data.csv')
+# Else, create sample dataset manually
+data = {
+    'YearsExperience': [1.1, 1.3, 1.5, 2.0, 2.2, 2.9, 3.0, 3.2, 3.2, 3.7,
+                        3.9, 4.0, 4.0, 4.1, 4.5, 4.9, 5.1, 5.3, 5.9, 6.0,
+                        6.8, 7.1, 7.9, 8.2, 8.7, 9.0, 9.5, 9.6, 10.3, 10.5],
+    'Salary': [39343, 46205, 37731, 43525, 39891, 56642, 60150, 54445, 64445, 57189,
+               63218, 55794, 56957, 57081, 61111, 67938, 66029, 83088, 81363, 93940,
+               91738, 98273, 101302, 113812, 109431, 105582, 116969, 112635, 122391, 121872]
+}
+df = pd.DataFrame(data)
+
+# Display first 5 rows
+print("Dataset Preview:")
+print(df.head())
+
+# Visualize data with scatter plot
+plt.figure(figsize=(8, 5))
+sns.scatterplot(x='YearsExperience', y='Salary', data=df)
+plt.title("Years of Experience vs Salary")
+plt.xlabel("Years of Experience")
+plt.ylabel("Salary")
+plt.grid(True)
+plt.show()
+
+# Define features and target
+X = df[['YearsExperience']]
+y = df['Salary']
+
+# Split data into training and testing sets (80/20)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+# Create and train Linear Regression model
+model = LinearRegression()
+model.fit(X_train, y_train)
+
+# Predict on test set
+y_pred = model.predict(X_test)
+
+# Evaluate model performance
+mse = mean_squared_error(y_test, y_pred)
+r2 = r2_score(y_test, y_pred)
+print(f"\nMean Squared Error: {mse:.2f}")
+print(f"R² Score: {r2:.2f}")
+
+# Plot regression line over scatter plot
+plt.figure(figsize=(8, 5))
+sns.scatterplot(x='YearsExperience', y='Salary', data=df, label='Actual')
+plt.plot(X_test, y_pred, color='red', linewidth=2, label='Predicted Line')
+plt.title("Linear Regression Fit")
+plt.xlabel("Years of Experience")
+plt.ylabel("Salary")
+plt.legend()
+plt.grid(True)
+plt.show()
+
+# Compare predictions vs actual values using bar chart
+comparison_df = pd.DataFrame({'Actual': y_test.values, 'Predicted': y_pred})
+comparison_df.plot(kind='bar', figsize=(10, 5))
+plt.title("Actual vs Predicted Salaries")
+plt.xlabel("Sample Index")
+plt.ylabel("Salary")
+plt.grid(True)
+plt.tight_layout()
+plt.show()
+
+# Optional: Predict salary for user input
+exp = float(input("Enter years of experience to predict salary: "))
+pred_salary = model.predict([[exp]])
+print(f"Predicted Salary for {exp} years of experience: ₹{pred_salary[0]:,.2f}")