From b81724c952c16562b3c2191fcd33cec7013e1e71 Mon Sep 17 00:00:00 2001
From: Dominik Krzeminski <dkk33@cam.ac.uk>
Date: Sun, 6 Feb 2022 13:51:12 +0000
Subject: [PATCH 1/4] lin reg script added

---
 snippets/linear_regression.py | 77 +++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 snippets/linear_regression.py

diff --git a/snippets/linear_regression.py b/snippets/linear_regression.py
new file mode 100644
index 0000000..76ee3ee
--- /dev/null
+++ b/snippets/linear_regression.py
@@ -0,0 +1,77 @@
+import numpy as np
+
+def compCostFunction(estim_y, true_y):
+    E = estim_y - true_y
+    C = (1 / 2 * m) * np.sum(E ** 2)
+    return C
+
+def test_dimensions(x, y):
+    # this checks whether the x and y have the same number of samples
+    assert isinstance(x, np.ndarray), "Only works for arrays"
+    assert isinstance(y, np.ndarray), "Only works for arrays"
+    return x.shape[0] == y.shape[0]
+
+# To be deleted later
+# feature_1 = np.linspace(0, 2, num=100)
+
+X = np.random.randn(100,3)  # feature matrix
+y = 1 + np.dot(X, [3.5, 4., -4])  # target vector
+
+m = np.shape(X)[0]  # nr of samples
+n = np.shape(X)[1]  # nr of features
+
+def iterativeLinearRegression(X, y, alpha=0.01):
+    """
+    This makes iterative LR via gradient descent and returns estimated parameters and history list.
+    """
+    steps=500
+    X = np.concatenate((np.ones((m, 1)), X), axis=1)
+
+    W = np.random.randn(n + 1, )
+ 
+    # stores the updates on the cost function
+    cost_history = []
+    # iterate until the maximum number of steps
+    for i in np.arange(steps):  # begin the process
+ 
+        y_estimated = X.dot(W)
+ 
+        cost = compCostFunction(y_estimated, y)
+        # Update gradient descent
+        E = y_estimated - y
+        gradient = (1 / m) * X.T.dot(E)
+ 
+        W = W - alpha * gradient
+        if i % 10 == 0:
+            print(f"step: {i}\tcost: {cost}")
+ 
+        cost_history.append(cost)
+ 
+    return W, cost_history
+
+params, history = iterativeLinearRegression(X, y)
+
+# test 1
+print(params)
+print(history)
+
+import matplotlib.pyplot as plt
+plt.plot(history)
+plt.xlabel("steps")
+plt.show()
+
+# test 2
+
+X = np.random.randn(500,2)  # feature matrix
+y = np.dot(X, [5, -1])  # target vector
+
+m = np.shape(X)[0]  # nr of samples
+n = np.shape(X)[1]  # nr of features
+
+params, history = iterativeLinearRegression(X, y)
+print(params)
+
+import matplotlib.pyplot as plt
+plt.plot(history)
+plt.xlabel("steps")
+plt.show()

From 153da389cb4425064dd150f782981f226ddd7802 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
Date: Thu, 10 Feb 2022 13:26:39 +0100
Subject: [PATCH 2/4] Remove trailing whitespace

---
 snippets/linear_regression.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/snippets/linear_regression.py b/snippets/linear_regression.py
index 76ee3ee..8b6e713 100644
--- a/snippets/linear_regression.py
+++ b/snippets/linear_regression.py
@@ -28,25 +28,25 @@ def iterativeLinearRegression(X, y, alpha=0.01):
     X = np.concatenate((np.ones((m, 1)), X), axis=1)
 
     W = np.random.randn(n + 1, )
- 
+
     # stores the updates on the cost function
     cost_history = []
     # iterate until the maximum number of steps
     for i in np.arange(steps):  # begin the process
- 
+
         y_estimated = X.dot(W)
- 
+
         cost = compCostFunction(y_estimated, y)
         # Update gradient descent
         E = y_estimated - y
         gradient = (1 / m) * X.T.dot(E)
- 
+
         W = W - alpha * gradient
         if i % 10 == 0:
             print(f"step: {i}\tcost: {cost}")
- 
+
         cost_history.append(cost)
- 
+
     return W, cost_history
 
 params, history = iterativeLinearRegression(X, y)

From bbc40d544e7f320c806930ded31d23f91d53e757 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
Date: Thu, 10 Feb 2022 13:30:29 +0100
Subject: [PATCH 3/4] Make global variable a param

---
 snippets/linear_regression.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/snippets/linear_regression.py b/snippets/linear_regression.py
index 8b6e713..2ccdc51 100644
--- a/snippets/linear_regression.py
+++ b/snippets/linear_regression.py
@@ -1,8 +1,8 @@
 import numpy as np
 
-def compCostFunction(estim_y, true_y):
+def compCostFunction(estim_y, true_y, nr_of_samples):
     E = estim_y - true_y
-    C = (1 / 2 * m) * np.sum(E ** 2)
+    C = (1 / 2 * nr_of_samples) * np.sum(E ** 2)
     return C
 
 def test_dimensions(x, y):
@@ -20,12 +20,12 @@ def test_dimensions(x, y):
 m = np.shape(X)[0]  # nr of samples
 n = np.shape(X)[1]  # nr of features
 
-def iterativeLinearRegression(X, y, alpha=0.01):
+def iterativeLinearRegression(X, y, nr_of_samples, alpha=0.01):
     """
     This makes iterative LR via gradient descent and returns estimated parameters and history list.
     """
     steps=500
-    X = np.concatenate((np.ones((m, 1)), X), axis=1)
+    X = np.concatenate((np.ones((nr_of_samples, 1)), X), axis=1)
 
     W = np.random.randn(n + 1, )
 
@@ -36,10 +36,10 @@ def iterativeLinearRegression(X, y, alpha=0.01):
 
         y_estimated = X.dot(W)
 
-        cost = compCostFunction(y_estimated, y)
+        cost = compCostFunction(y_estimated, y, nr_of_samples)
         # Update gradient descent
         E = y_estimated - y
-        gradient = (1 / m) * X.T.dot(E)
+        gradient = (1 / nr_of_samples) * X.T.dot(E)
 
         W = W - alpha * gradient
         if i % 10 == 0:
@@ -49,7 +49,7 @@ def iterativeLinearRegression(X, y, alpha=0.01):
 
     return W, cost_history
 
-params, history = iterativeLinearRegression(X, y)
+params, history = iterativeLinearRegression(X, y, m)
 
 # test 1
 print(params)
@@ -68,7 +68,7 @@ def iterativeLinearRegression(X, y, alpha=0.01):
 m = np.shape(X)[0]  # nr of samples
 n = np.shape(X)[1]  # nr of features
 
-params, history = iterativeLinearRegression(X, y)
+params, history = iterativeLinearRegression(X, y, m)
 print(params)
 
 import matplotlib.pyplot as plt

From f3d64b499efe14e7165b877ea5555554a4bbceb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
Date: Thu, 10 Feb 2022 13:31:49 +0100
Subject: [PATCH 4/4] Use matrix multiplication symbol

---
 snippets/linear_regression.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/snippets/linear_regression.py b/snippets/linear_regression.py
index 2ccdc51..f08b051 100644
--- a/snippets/linear_regression.py
+++ b/snippets/linear_regression.py
@@ -15,7 +15,7 @@ def test_dimensions(x, y):
 # feature_1 = np.linspace(0, 2, num=100)
 
 X = np.random.randn(100,3)  # feature matrix
-y = 1 + np.dot(X, [3.5, 4., -4])  # target vector
+y = 1 + X @ [3.5, 4., -4]  # target vector
 
 m = np.shape(X)[0]  # nr of samples
 n = np.shape(X)[1]  # nr of features
@@ -34,12 +34,12 @@ def iterativeLinearRegression(X, y, nr_of_samples, alpha=0.01):
     # iterate until the maximum number of steps
     for i in np.arange(steps):  # begin the process
 
-        y_estimated = X.dot(W)
+        y_estimated = X @ W
 
         cost = compCostFunction(y_estimated, y, nr_of_samples)
         # Update gradient descent
         E = y_estimated - y
-        gradient = (1 / nr_of_samples) * X.T.dot(E)
+        gradient = (1 / nr_of_samples) * X.T @ E
 
         W = W - alpha * gradient
         if i % 10 == 0:
@@ -63,7 +63,7 @@ def iterativeLinearRegression(X, y, nr_of_samples, alpha=0.01):
 # test 2
 
 X = np.random.randn(500,2)  # feature matrix
-y = np.dot(X, [5, -1])  # target vector
+y = X @ [5, -1]  # target vector
 
 m = np.shape(X)[0]  # nr of samples
 n = np.shape(X)[1]  # nr of features