From c75b9cd998816f0ade3efe73468a25e66fa99a78 Mon Sep 17 00:00:00 2001 From: Seth Reno Date: Sat, 14 Dec 2024 11:04:49 -0600 Subject: [PATCH 1/5] Make gradient_descent.py work out of the box --- .gitignore | 3 + README.md | 20 ++++- data.txt | 202 +++++++++++++++++++++++++++++++++++++++++++- gradient_descent.py | 41 +++++++-- requirements.txt | 56 ++++++++++++ 5 files changed, 309 insertions(+), 13 deletions(-) create mode 100644 .gitignore create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f6ed2f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.vscode/ +.venv/ +output/ \ No newline at end of file diff --git a/README.md b/README.md index 2f171e1..8a318de 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,20 @@ # The Hundred-Page Machine Learning Book The Python code to reproduce the illustrations from [The Hundred-Page Machine Learning Book](http://themlbook.com/). -![](http://themlbook.com/images/og-image3.png) - -**WARNING!** To avoid buying counterfeit on Amazon, click on **[See All Buying Options](https://www.amazon.com/gp/offer-listing/199957950X/)** and choose "Amazon.com" and not a third-party seller. +# Getting Started +1. Create a virtual environment + ```bash + py -m venv .venv + ``` +2. Activate the virtual environment + ```bash + .venv/Scripts/activate + ``` +3. Install dependencies + ```bash + pip install -r requirements.txt + ``` +4. Run one of the examples + ``` + py gradient_descent.py + ``` diff --git a/data.txt b/data.txt index 448916c..9547f43 100644 --- a/data.txt +++ b/data.txt @@ -1 +1,201 @@ -The dataset for gradient descent example can be downloaded from: http://themlbook.com/wiki/doku.php?id=gradient_descent +,TV,radio,newspaper,sales +1,230.1,37.8,69.2,22.1 +2,44.5,39.3,45.1,10.4 +3,17.2,45.9,69.3,9.3 +4,151.5,41.3,58.5,18.5 +5,180.8,10.8,58.4,12.9 +6,8.7,48.9,75,7.2 +7,57.5,32.8,23.5,11.8 +8,120.2,19.6,11.6,13.2 +9,8.6,2.1,1,4.8 +10,199.8,2.6,21.2,10.6 +11,66.1,5.8,24.2,8.6 +12,214.7,24,4,17.4 +13,23.8,35.1,65.9,9.2 +14,97.5,7.6,7.2,9.7 +15,204.1,32.9,46,19 +16,195.4,47.7,52.9,22.4 +17,67.8,36.6,114,12.5 +18,281.4,39.6,55.8,24.4 +19,69.2,20.5,18.3,11.3 +20,147.3,23.9,19.1,14.6 +21,218.4,27.7,53.4,18 +22,237.4,5.1,23.5,12.5 +23,13.2,15.9,49.6,5.6 +24,228.3,16.9,26.2,15.5 +25,62.3,12.6,18.3,9.7 +26,262.9,3.5,19.5,12 +27,142.9,29.3,12.6,15 +28,240.1,16.7,22.9,15.9 +29,248.8,27.1,22.9,18.9 +30,70.6,16,40.8,10.5 +31,292.9,28.3,43.2,21.4 +32,112.9,17.4,38.6,11.9 +33,97.2,1.5,30,9.6 +34,265.6,20,0.3,17.4 +35,95.7,1.4,7.4,9.5 +36,290.7,4.1,8.5,12.8 +37,266.9,43.8,5,25.4 +38,74.7,49.4,45.7,14.7 +39,43.1,26.7,35.1,10.1 +40,228,37.7,32,21.5 +41,202.5,22.3,31.6,16.6 +42,177,33.4,38.7,17.1 +43,293.6,27.7,1.8,20.7 +44,206.9,8.4,26.4,12.9 +45,25.1,25.7,43.3,8.5 +46,175.1,22.5,31.5,14.9 +47,89.7,9.9,35.7,10.6 +48,239.9,41.5,18.5,23.2 +49,227.2,15.8,49.9,14.8 +50,66.9,11.7,36.8,9.7 +51,199.8,3.1,34.6,11.4 +52,100.4,9.6,3.6,10.7 +53,216.4,41.7,39.6,22.6 +54,182.6,46.2,58.7,21.2 +55,262.7,28.8,15.9,20.2 +56,198.9,49.4,60,23.7 +57,7.3,28.1,41.4,5.5 +58,136.2,19.2,16.6,13.2 +59,210.8,49.6,37.7,23.8 +60,210.7,29.5,9.3,18.4 +61,53.5,2,21.4,8.1 +62,261.3,42.7,54.7,24.2 +63,239.3,15.5,27.3,15.7 +64,102.7,29.6,8.4,14 +65,131.1,42.8,28.9,18 +66,69,9.3,0.9,9.3 +67,31.5,24.6,2.2,9.5 +68,139.3,14.5,10.2,13.4 +69,237.4,27.5,11,18.9 +70,216.8,43.9,27.2,22.3 +71,199.1,30.6,38.7,18.3 +72,109.8,14.3,31.7,12.4 +73,26.8,33,19.3,8.8 +74,129.4,5.7,31.3,11 +75,213.4,24.6,13.1,17 +76,16.9,43.7,89.4,8.7 +77,27.5,1.6,20.7,6.9 +78,120.5,28.5,14.2,14.2 +79,5.4,29.9,9.4,5.3 +80,116,7.7,23.1,11 +81,76.4,26.7,22.3,11.8 +82,239.8,4.1,36.9,12.3 +83,75.3,20.3,32.5,11.3 +84,68.4,44.5,35.6,13.6 +85,213.5,43,33.8,21.7 +86,193.2,18.4,65.7,15.2 +87,76.3,27.5,16,12 +88,110.7,40.6,63.2,16 +89,88.3,25.5,73.4,12.9 +90,109.8,47.8,51.4,16.7 +91,134.3,4.9,9.3,11.2 +92,28.6,1.5,33,7.3 +93,217.7,33.5,59,19.4 +94,250.9,36.5,72.3,22.2 +95,107.4,14,10.9,11.5 +96,163.3,31.6,52.9,16.9 +97,197.6,3.5,5.9,11.7 +98,184.9,21,22,15.5 +99,289.7,42.3,51.2,25.4 +100,135.2,41.7,45.9,17.2 +101,222.4,4.3,49.8,11.7 +102,296.4,36.3,100.9,23.8 +103,280.2,10.1,21.4,14.8 +104,187.9,17.2,17.9,14.7 +105,238.2,34.3,5.3,20.7 +106,137.9,46.4,59,19.2 +107,25,11,29.7,7.2 +108,90.4,0.3,23.2,8.7 +109,13.1,0.4,25.6,5.3 +110,255.4,26.9,5.5,19.8 +111,225.8,8.2,56.5,13.4 +112,241.7,38,23.2,21.8 +113,175.7,15.4,2.4,14.1 +114,209.6,20.6,10.7,15.9 +115,78.2,46.8,34.5,14.6 +116,75.1,35,52.7,12.6 +117,139.2,14.3,25.6,12.2 +118,76.4,0.8,14.8,9.4 +119,125.7,36.9,79.2,15.9 +120,19.4,16,22.3,6.6 +121,141.3,26.8,46.2,15.5 +122,18.8,21.7,50.4,7 +123,224,2.4,15.6,11.6 +124,123.1,34.6,12.4,15.2 +125,229.5,32.3,74.2,19.7 +126,87.2,11.8,25.9,10.6 +127,7.8,38.9,50.6,6.6 +128,80.2,0,9.2,8.8 +129,220.3,49,3.2,24.7 +130,59.6,12,43.1,9.7 +131,0.7,39.6,8.7,1.6 +132,265.2,2.9,43,12.7 +133,8.4,27.2,2.1,5.7 +134,219.8,33.5,45.1,19.6 +135,36.9,38.6,65.6,10.8 +136,48.3,47,8.5,11.6 +137,25.6,39,9.3,9.5 +138,273.7,28.9,59.7,20.8 +139,43,25.9,20.5,9.6 +140,184.9,43.9,1.7,20.7 +141,73.4,17,12.9,10.9 +142,193.7,35.4,75.6,19.2 +143,220.5,33.2,37.9,20.1 +144,104.6,5.7,34.4,10.4 +145,96.2,14.8,38.9,11.4 +146,140.3,1.9,9,10.3 +147,240.1,7.3,8.7,13.2 +148,243.2,49,44.3,25.4 +149,38,40.3,11.9,10.9 +150,44.7,25.8,20.6,10.1 +151,280.7,13.9,37,16.1 +152,121,8.4,48.7,11.6 +153,197.6,23.3,14.2,16.6 +154,171.3,39.7,37.7,19 +155,187.8,21.1,9.5,15.6 +156,4.1,11.6,5.7,3.2 +157,93.9,43.5,50.5,15.3 +158,149.8,1.3,24.3,10.1 +159,11.7,36.9,45.2,7.3 +160,131.7,18.4,34.6,12.9 +161,172.5,18.1,30.7,14.4 +162,85.7,35.8,49.3,13.3 +163,188.4,18.1,25.6,14.9 +164,163.5,36.8,7.4,18 +165,117.2,14.7,5.4,11.9 +166,234.5,3.4,84.8,11.9 +167,17.9,37.6,21.6,8 +168,206.8,5.2,19.4,12.2 +169,215.4,23.6,57.6,17.1 +170,284.3,10.6,6.4,15 +171,50,11.6,18.4,8.4 +172,164.5,20.9,47.4,14.5 +173,19.6,20.1,17,7.6 +174,168.4,7.1,12.8,11.7 +175,222.4,3.4,13.1,11.5 +176,276.9,48.9,41.8,27 +177,248.4,30.2,20.3,20.2 +178,170.2,7.8,35.2,11.7 +179,276.7,2.3,23.7,11.8 +180,165.6,10,17.6,12.6 +181,156.6,2.6,8.3,10.5 +182,218.5,5.4,27.4,12.2 +183,56.2,5.7,29.7,8.7 +184,287.6,43,71.8,26.2 +185,253.8,21.3,30,17.6 +186,205,45.1,19.6,22.6 +187,139.5,2.1,26.6,10.3 +188,191.1,28.7,18.2,17.3 +189,286,13.9,3.7,15.9 +190,18.7,12.1,23.4,6.7 +191,39.5,41.1,5.8,10.8 +192,75.5,10.8,6,9.9 +193,17.2,4.1,31.6,5.9 +194,166.8,42,3.6,19.6 +195,149.7,35.6,6,17.3 +196,38.2,3.7,13.8,7.6 +197,94.2,4.9,8.1,9.7 +198,177,9.3,6.4,12.8 +199,283.6,42,66.2,25.5 +200,232.1,8.6,8.7,13.4 diff --git a/gradient_descent.py b/gradient_descent.py index 254ebfa..8d2fd39 100644 --- a/gradient_descent.py +++ b/gradient_descent.py @@ -1,15 +1,30 @@ from __future__ import print_function import numpy as np import matplotlib.pyplot as plt +import pathlib import matplotlib matplotlib.rcParams['mathtext.fontset'] = 'stix' matplotlib.rcParams['font.family'] = 'STIXGeneral' matplotlib.rcParams.update({'font.size': 18}) +# constants for columns in data.txt +TV_COL=1; +RADIO_COL=2; +PAPER_COL=3; +SALES_COL=4 + +# platform agnostic path to the output directory +output_dir = pathlib.Path.cwd() / 'output' def plot_original_data(): - x, y = np.loadtxt("data.txt", delimiter= "\t", unpack = True) + x, y = np.loadtxt( + "data.txt", + skiprows=1, + usecols=(RADIO_COL, SALES_COL), + delimiter= ",", + unpack = True, + ) plt.scatter(x, y, color='#1f77b4', marker='o') @@ -19,9 +34,9 @@ def plot_original_data(): #plt.show() fig1 = plt.gcf() fig1.subplots_adjust(top = 0.98, bottom = 0.1, right = 0.98, left = 0.08, hspace = 0, wspace = 0) - fig1.savefig('../../Illustrations/gradient_descent-1.eps', format='eps', dpi=1000, bbox_inches = 'tight', pad_inches = 0) - fig1.savefig('../../Illustrations/gradient_descent-1.pdf', format='pdf', dpi=1000, bbox_inches = 'tight', pad_inches = 0) - fig1.savefig('../../Illustrations/gradient_descent-1.png', dpi=1000, bbox_inches = 'tight', pad_inches = 0) + fig1.savefig(output_dir / 'gradient_descent-1.eps', format='eps', dpi=1000, bbox_inches = 'tight', pad_inches = 0) + fig1.savefig(output_dir / 'gradient_descent-1.pdf', format='pdf', dpi=1000, bbox_inches = 'tight', pad_inches = 0) + fig1.savefig(output_dir / 'gradient_descent-1.png', dpi=1000, bbox_inches = 'tight', pad_inches = 0) def update_w_and_b(spendings, sales, w, b, alpha): dr_dw = 0.0 @@ -57,9 +72,9 @@ def train(spendings, sales, w, b, alpha, epochs): #plt.show() fig1 = plt.gcf() fig1.subplots_adjust(top = 0.98, bottom = 0.1, right = 0.98, left = 0.08, hspace = 0, wspace = 0) - fig1.savefig('../../Illustrations/gradient_descent-' + str(image_counter) + '.eps', format='eps', dpi=1000, bbox_inches = 'tight', pad_inches = 0) - fig1.savefig('../../Illustrations/gradient_descent-' + str(image_counter) + '.pdf', format='pdf', dpi=1000, bbox_inches = 'tight', pad_inches = 0) - fig1.savefig('../../Illustrations/gradient_descent-' + str(image_counter) + '.png', dpi=1000, bbox_inches = 'tight', pad_inches = 0) + fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.eps'), format='eps', dpi=1000, bbox_inches = 'tight', pad_inches = 0) + fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.pdf'), format='pdf', dpi=1000, bbox_inches = 'tight', pad_inches = 0) + fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.png'), dpi=1000, bbox_inches = 'tight', pad_inches = 0) image_counter += 1 return w, b @@ -70,11 +85,19 @@ def loss(spendings, sales, w, b): total_error += (sales[i] - (w*spendings[i] + b))**2 return total_error / N -x, y = np.loadtxt("data.txt", delimiter= "\t", unpack = True) -#w, b = train(x, y, 0.0, 0.0, 0.001, 15000) +x, y = np.loadtxt( + "data.txt", + skiprows=1, + usecols=(RADIO_COL, SALES_COL), + delimiter= ",", + unpack = True, +) plot_original_data() +w, b = train(x, y, 0.0, 0.0, 0.001, 15000) + + def predict(x, w, b): return w*x + b x_new = 23.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..62e3d2e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,56 @@ +absl-py==2.1.0 +astunparse==1.6.3 +certifi==2024.12.14 +charset-normalizer==3.4.0 +colorama==0.4.6 +contourpy==1.3.1 +cycler==0.12.1 +flatbuffers==24.3.25 +fonttools==4.55.3 +gast==0.6.0 +google-pasta==0.2.0 +grpcio==1.68.1 +h5py==3.12.1 +idna==3.10 +joblib==1.4.2 +keras==3.7.0 +kiwisolver==1.4.7 +libclang==18.1.1 +llvmlite==0.43.0 +Markdown==3.7 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +matplotlib==3.10.0 +mdurl==0.1.2 +ml-dtypes==0.4.1 +namex==0.0.8 +numba==0.60.0 +numpy==2.0.2 +opt_einsum==3.4.0 +optree==0.13.1 +packaging==24.2 +pillow==11.0.0 +protobuf==5.29.1 +Pygments==2.18.0 +pynndescent==0.5.13 +pyparsing==3.2.0 +python-dateutil==2.9.0.post0 +requests==2.32.3 +rich==13.9.4 +scikit-learn==1.6.0 +scipy==1.14.1 +setuptools==75.6.0 +six==1.17.0 +tensorboard==2.18.0 +tensorboard-data-server==0.7.2 +tensorflow==2.18.0 +tensorflow_intel==2.18.0 +termcolor==2.5.0 +threadpoolctl==3.5.0 +tqdm==4.67.1 +typing_extensions==4.12.2 +umap-learn==0.5.7 +urllib3==2.2.3 +Werkzeug==3.1.3 +wheel==0.45.1 +wrapt==1.17.0 From 006dbb26411e4b13f4cb219f4a3306007239a5fe Mon Sep 17 00:00:00 2001 From: Seth Reno Date: Sat, 14 Dec 2024 11:21:18 -0600 Subject: [PATCH 2/5] Prep for PR Removed some of the more opinionated changes I made so the PR is less obtrusive. --- .gitignore | 2 +- README.md | 4 ++++ gradient_descent.py | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index f6ed2f4..ff72100 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ .vscode/ .venv/ -output/ \ No newline at end of file +Illustrations/ \ No newline at end of file diff --git a/README.md b/README.md index 8a318de..084d378 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ # The Hundred-Page Machine Learning Book The Python code to reproduce the illustrations from [The Hundred-Page Machine Learning Book](http://themlbook.com/). +![](http://themlbook.com/images/og-image3.png) + +**WARNING!** To avoid buying counterfeit on Amazon, click on **[See All Buying Options](https://www.amazon.com/gp/offer-listing/199957950X/)** and choose "Amazon.com" and not a third-party seller. + # Getting Started 1. Create a virtual environment ```bash diff --git a/gradient_descent.py b/gradient_descent.py index 8d2fd39..6aeed3f 100644 --- a/gradient_descent.py +++ b/gradient_descent.py @@ -15,7 +15,8 @@ SALES_COL=4 # platform agnostic path to the output directory -output_dir = pathlib.Path.cwd() / 'output' +output_dir = pathlib.Path.cwd() / 'Illustrations' +output_dir.mkdir(parents=True, exist_ok=True) def plot_original_data(): x, y = np.loadtxt( @@ -97,7 +98,6 @@ def loss(spendings, sales, w, b): w, b = train(x, y, 0.0, 0.0, 0.001, 15000) - def predict(x, w, b): return w*x + b x_new = 23.0 From 01daf74f82b26a5b9486ec31302aba884e714e37 Mon Sep 17 00:00:00 2001 From: Seth Reno Date: Sat, 14 Dec 2024 11:26:09 -0600 Subject: [PATCH 3/5] Move function call back --- gradient_descent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gradient_descent.py b/gradient_descent.py index 6aeed3f..f4d8838 100644 --- a/gradient_descent.py +++ b/gradient_descent.py @@ -94,10 +94,10 @@ def loss(spendings, sales, w, b): unpack = True, ) -plot_original_data() - w, b = train(x, y, 0.0, 0.0, 0.001, 15000) +plot_original_data() + def predict(x, w, b): return w*x + b x_new = 23.0 From 777caf58341566ef98bca8a634880387d354a327 Mon Sep 17 00:00:00 2001 From: Seth Reno Date: Sat, 14 Dec 2024 17:00:57 -0600 Subject: [PATCH 4/5] Add sklearn version of gradient descent --- gradient_descent.py | 49 ++++++++++++++++++++++--------------- gradient_descent_sklearn.py | 24 ++++++++++++++++++ 2 files changed, 53 insertions(+), 20 deletions(-) create mode 100644 gradient_descent_sklearn.py diff --git a/gradient_descent.py b/gradient_descent.py index f4d8838..5efd68b 100644 --- a/gradient_descent.py +++ b/gradient_descent.py @@ -2,12 +2,17 @@ import numpy as np import matplotlib.pyplot as plt import pathlib - import matplotlib +import sys + matplotlib.rcParams['mathtext.fontset'] = 'stix' matplotlib.rcParams['font.family'] = 'STIXGeneral' matplotlib.rcParams.update({'font.size': 18}) +quiet = False +if len(sys.argv) > 1 and sys.argv[1] == 'quiet': + quiet = True + # constants for columns in data.txt TV_COL=1; RADIO_COL=2; @@ -59,24 +64,26 @@ def train(spendings, sales, w, b, alpha, epochs): for e in range(epochs): w, b = update_w_and_b(spendings, sales, w, b, alpha) - # log the progress - if (e == 0) or (e < 3000 and e % 400 == 0) or (e % 3000 == 0): - print("epoch: ", str(e), "loss: "+str(loss(spendings, sales, w, b))) - print("w, b: ", w, b) - plt.figure(image_counter) - axes = plt.gca() - axes.set_xlim([0,50]) - axes.set_ylim([0,30]) - plt.scatter(spendings, sales) - X_plot = np.linspace(0,50,50) - plt.plot(X_plot, X_plot*w + b) - #plt.show() - fig1 = plt.gcf() - fig1.subplots_adjust(top = 0.98, bottom = 0.1, right = 0.98, left = 0.08, hspace = 0, wspace = 0) - fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.eps'), format='eps', dpi=1000, bbox_inches = 'tight', pad_inches = 0) - fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.pdf'), format='pdf', dpi=1000, bbox_inches = 'tight', pad_inches = 0) - fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.png'), dpi=1000, bbox_inches = 'tight', pad_inches = 0) - image_counter += 1 + if not quiet: + # log the progress + if (e == 0) or (e < 3000 and e % 400 == 0) or (e % 3000 == 0): + print("epoch: ", str(e), "loss: "+str(loss(spendings, sales, w, b))) + print("w, b: ", w, b) + plt.figure(image_counter) + axes = plt.gca() + axes.set_xlim([0,50]) + axes.set_ylim([0,30]) + plt.scatter(spendings, sales) + X_plot = np.linspace(0,50,50) + plt.plot(X_plot, X_plot*w + b) + #plt.show() + fig1 = plt.gcf() + fig1.subplots_adjust(top = 0.98, bottom = 0.1, right = 0.98, left = 0.08, hspace = 0, wspace = 0) + fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.eps'), format='eps', dpi=1000, bbox_inches = 'tight', pad_inches = 0) + fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.pdf'), format='pdf', dpi=1000, bbox_inches = 'tight', pad_inches = 0) + fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.png'), dpi=1000, bbox_inches = 'tight', pad_inches = 0) + image_counter += 1 + return w, b def loss(spendings, sales, w, b): @@ -96,10 +103,12 @@ def loss(spendings, sales, w, b): w, b = train(x, y, 0.0, 0.0, 0.001, 15000) -plot_original_data() +if not quiet: + plot_original_data() def predict(x, w, b): return w*x + b + x_new = 23.0 y_new = predict(x_new, w, b) print(y_new) diff --git a/gradient_descent_sklearn.py b/gradient_descent_sklearn.py new file mode 100644 index 0000000..c12d420 --- /dev/null +++ b/gradient_descent_sklearn.py @@ -0,0 +1,24 @@ +import numpy as np +from sklearn.linear_model import LinearRegression + +# constants for columns in data.txt +TV_COL = 1 +RADIO_COL = 2 +PAPER_COL = 3 +SALES_COL = 4 + +x, y = np.loadtxt( + "data.txt", + skiprows=1, + usecols=(RADIO_COL, SALES_COL), + delimiter=",", + unpack=True, +) + +# produces the same results as gradient_descent.py +# but it's 2.8x faster and requires less code +model = LinearRegression().fit(x.reshape(-1, 1), y) + +x_new = 23.0 +y_new = model.predict(np.array([[x_new]])) +print(y_new[0]) From 172256aa4a8f08ed89f7d8dfb32804671e896ec0 Mon Sep 17 00:00:00 2001 From: Seth Reno Date: Sat, 14 Dec 2024 17:04:38 -0600 Subject: [PATCH 5/5] Revert "Add sklearn version of gradient descent" This reverts commit 777caf58341566ef98bca8a634880387d354a327. --- gradient_descent.py | 49 +++++++++++++++---------------------- gradient_descent_sklearn.py | 24 ------------------ 2 files changed, 20 insertions(+), 53 deletions(-) delete mode 100644 gradient_descent_sklearn.py diff --git a/gradient_descent.py b/gradient_descent.py index 5efd68b..f4d8838 100644 --- a/gradient_descent.py +++ b/gradient_descent.py @@ -2,17 +2,12 @@ import numpy as np import matplotlib.pyplot as plt import pathlib -import matplotlib -import sys +import matplotlib matplotlib.rcParams['mathtext.fontset'] = 'stix' matplotlib.rcParams['font.family'] = 'STIXGeneral' matplotlib.rcParams.update({'font.size': 18}) -quiet = False -if len(sys.argv) > 1 and sys.argv[1] == 'quiet': - quiet = True - # constants for columns in data.txt TV_COL=1; RADIO_COL=2; @@ -64,26 +59,24 @@ def train(spendings, sales, w, b, alpha, epochs): for e in range(epochs): w, b = update_w_and_b(spendings, sales, w, b, alpha) - if not quiet: - # log the progress - if (e == 0) or (e < 3000 and e % 400 == 0) or (e % 3000 == 0): - print("epoch: ", str(e), "loss: "+str(loss(spendings, sales, w, b))) - print("w, b: ", w, b) - plt.figure(image_counter) - axes = plt.gca() - axes.set_xlim([0,50]) - axes.set_ylim([0,30]) - plt.scatter(spendings, sales) - X_plot = np.linspace(0,50,50) - plt.plot(X_plot, X_plot*w + b) - #plt.show() - fig1 = plt.gcf() - fig1.subplots_adjust(top = 0.98, bottom = 0.1, right = 0.98, left = 0.08, hspace = 0, wspace = 0) - fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.eps'), format='eps', dpi=1000, bbox_inches = 'tight', pad_inches = 0) - fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.pdf'), format='pdf', dpi=1000, bbox_inches = 'tight', pad_inches = 0) - fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.png'), dpi=1000, bbox_inches = 'tight', pad_inches = 0) - image_counter += 1 - + # log the progress + if (e == 0) or (e < 3000 and e % 400 == 0) or (e % 3000 == 0): + print("epoch: ", str(e), "loss: "+str(loss(spendings, sales, w, b))) + print("w, b: ", w, b) + plt.figure(image_counter) + axes = plt.gca() + axes.set_xlim([0,50]) + axes.set_ylim([0,30]) + plt.scatter(spendings, sales) + X_plot = np.linspace(0,50,50) + plt.plot(X_plot, X_plot*w + b) + #plt.show() + fig1 = plt.gcf() + fig1.subplots_adjust(top = 0.98, bottom = 0.1, right = 0.98, left = 0.08, hspace = 0, wspace = 0) + fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.eps'), format='eps', dpi=1000, bbox_inches = 'tight', pad_inches = 0) + fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.pdf'), format='pdf', dpi=1000, bbox_inches = 'tight', pad_inches = 0) + fig1.savefig(output_dir / ('gradient_descent-' + str(image_counter) + '.png'), dpi=1000, bbox_inches = 'tight', pad_inches = 0) + image_counter += 1 return w, b def loss(spendings, sales, w, b): @@ -103,12 +96,10 @@ def loss(spendings, sales, w, b): w, b = train(x, y, 0.0, 0.0, 0.001, 15000) -if not quiet: - plot_original_data() +plot_original_data() def predict(x, w, b): return w*x + b - x_new = 23.0 y_new = predict(x_new, w, b) print(y_new) diff --git a/gradient_descent_sklearn.py b/gradient_descent_sklearn.py deleted file mode 100644 index c12d420..0000000 --- a/gradient_descent_sklearn.py +++ /dev/null @@ -1,24 +0,0 @@ -import numpy as np -from sklearn.linear_model import LinearRegression - -# constants for columns in data.txt -TV_COL = 1 -RADIO_COL = 2 -PAPER_COL = 3 -SALES_COL = 4 - -x, y = np.loadtxt( - "data.txt", - skiprows=1, - usecols=(RADIO_COL, SALES_COL), - delimiter=",", - unpack=True, -) - -# produces the same results as gradient_descent.py -# but it's 2.8x faster and requires less code -model = LinearRegression().fit(x.reshape(-1, 1), y) - -x_new = 23.0 -y_new = model.predict(np.array([[x_new]])) -print(y_new[0])