From 54f706534845ccd3ae717f39593d51980b187fd5 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Mon, 31 Oct 2022 14:19:42 -0400
Subject: [PATCH 01/14] utility and testing scripts needed for implementing ML
 and SSML

---
 .github/workflows/python-package.yml |   2 +-
 README.md                            |   6 +
 requirements.txt                     |   7 +
 scripts/__init__.py                  |   0
 scripts/utils.py                     | 328 ++++++++++++++++++
 tests/test_BackgroundEstimator.py    |   1 -
 tests/test_models.py                 | 491 +++++++++++++++++++++++++++
 7 files changed, 833 insertions(+), 2 deletions(-)
 create mode 100644 scripts/__init__.py
 create mode 100644 scripts/utils.py
 create mode 100644 tests/test_models.py

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index d88f9c7..973f71c 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -41,7 +41,7 @@ jobs:
     - name: Test with pytest
       run: |
         python3 -m pytest
-        python3 -m coverage run --source=./RadClass/ -m pytest
+        python3 -m coverage run --source=./RadClass/,./models/,./scripts/ -m pytest
         python3 -m coverage report
         python3 -m coverage html
         COVERALLS_REPO_TOKEN=${{ secrets.COVERALLS_REPO_TOKEN }} python3 -m coveralls --service=github
diff --git a/README.md b/README.md
index b08bd07..42245fa 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,13 @@ Versions 3.6-3.9 are currently supported by tests. The following Python packages
 * h5py
 * numpy
 * progressbar2
+* matplotlib
+* seaborn
 * scipy
+* sklearn
+* hyperopt
+* torch
+* shadow-ssml
 
 Modules can be imported from the repository directory (e.g. `from RadClass.H0 import H0`) or `RadClass` can be installed using pip:
 
diff --git a/requirements.txt b/requirements.txt
index 06d1c3a..8b22315 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,10 @@ numpy
 h5py
 progressbar2
 scipy>=1.7.0
+scikit-learn
+hyperopt
+matplotlib
+seaborn
+joblib
+torch
+shadow-ssml
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/utils.py b/scripts/utils.py
new file mode 100644
index 0000000..d91c826
--- /dev/null
+++ b/scripts/utils.py
@@ -0,0 +1,328 @@
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+# For hyperopt (parameter optimization)
+from hyperopt import Trials, tpe, fmin
+from functools import partial
+# diagnostics
+from sklearn.metrics import confusion_matrix
+# pca
+from sklearn.preprocessing import StandardScaler
+from sklearn.decomposition import PCA
+# Cross Validation
+from sklearn.model_selection import KFold, StratifiedKFold
+
+
+class EarlyStopper:
+    '''
+    Early stopping mechanism for neural networks.
+    Code adapted from user "isle_of_gods" from StackOverflow:
+    https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch
+    Use this class to break a training loop if the validation loss is low.
+    Inputs:
+    patience: integer; forces stop if validation loss has not improved
+        for some time
+    min_delta: "fudge value" for how much loss to tolerate before stopping
+    '''
+
+    def __init__(self, patience=1, min_delta=0):
+        self.patience = patience
+        self.min_delta = min_delta
+        self.counter = 0
+        self.min_validation_loss = np.inf
+
+    def early_stop(self, validation_loss):
+        '''
+        Tests for the early stopping condition if the validation loss
+        has not improved for a certain period of time (patience).
+        Inputs:
+        validation_loss: typically a float value for the loss function of
+            a neural network training loop
+        '''
+
+        if validation_loss < self.min_validation_loss:
+            # keep track of the smallest validation loss
+            # if it has been beaten, restart patience
+            self.min_validation_loss = validation_loss
+            self.counter = 0
+        elif validation_loss > (self.min_validation_loss + self.min_delta):
+            # keep track of whether validation loss has been decreasing
+            # by a tolerable amount
+            self.counter += 1
+            if self.counter >= self.patience:
+                return True
+        return False
+
+
+def run_hyperopt(space, model, data_dict, max_evals=50, verbose=True):
+    '''
+    Runs hyperparameter optimization on a model given a parameter space.
+    Inputs:
+    space: dictionary with each hyperparameter as keys and values being the
+        range of parameter space (see hyperopt docs for defining a space)
+    mode: function that takes params dictionary, trains a specified ML model
+        and returns the optimization loss function, model, and other
+        attributes (e.g. accuracy on evaluation set)
+    max_eval: (int) run hyperparameter optimization for max_val iterations
+    verbose: report best and worse loss/accuracy
+
+    Returns:
+    best: dictionary with returns from model function, including best loss,
+        best trained model, best parameters, etc.
+    worst: dictionary with returns from model function, including worst loss,
+        worst trained model, worst parameters, etc.
+    '''
+
+    trials = Trials()
+
+    # wrap data into objective function
+    fmin_objective = partial(model, data_dict=data_dict)
+
+    # run hyperopt
+    fmin(fmin_objective,
+         space,
+         algo=tpe.suggest,
+         max_evals=max_evals,
+         trials=trials)
+
+    # of all trials, find best and worst loss/accuracy from optimization
+    best = trials.results[np.argmin([r['loss'] for r in trials.results])]
+    worst = trials.results[np.argmax([r['loss'] for r in trials.results])]
+
+    if verbose:
+        print('best accuracy:', 1-best['loss'])
+        print('best params:', best['params'])
+        print('worst accuracy:', 1-worst['loss'])
+        print('worst params:', worst['params'])
+
+    return best, worst
+
+
+def cross_validation(model, X, y, params, n_splits=3,
+                     stratified=False, random_state=None):
+    '''
+    Perform K-Fold cross validation using sklearn and a given model.
+    The model *must* have a fresh_start method (see models in RadClass/models).
+    fresh_start() is used instead of train() to be agnostic to the data needed
+        for training (fresh_start requires a data_dict whereas each model's
+        train could take different combinations of labeled & unlabeled data).
+        This also avoids the need to do hyperparameter optimization (and
+        therefore many training epochs) for every K-Fold.
+    NOTE: fresh_start returns the model and results in a dictionary but
+        does not overwrite/save the model to the respective class.
+        You can manually overwrite using model.model = return.model
+    Hyperparameter optimization (model.optimize) can be done before or after
+        cross validation to specify the (optimal) parameters used by the model
+        since they are required here.
+    NOTE: Fixed default to shuffle data during cross validation splits.
+        (See sklearn cross validation docs for more info.)
+    NOTE: Unlabeled data, if provided, will always be included in the training
+        dataset. This means that this cross validation implementation is
+        susceptible to bias in the unlabeled data distribution. To test for
+        this bias, a user can manually run cross validation as a parent to
+        calling this function, splitting the unlabeled data and adding
+        different folds into X.
+    Inputs:
+    model: ML model class object (e.g. RadClass/models).
+        Must have a fresh_start() method.
+        NOTE: If the model expects unlabeled data but unlabed data is not
+        provided in X/y, an error will likely be thrown when training the model
+        through fresh_start.
+    X: array of feature vectors (rows of individual instances, cols of vectors)
+        This should include all data for training and testing (since the
+        testing subset will be split by cross validation), including unlabeled
+        data if needed/used.
+    y: array/vector of labels for X. If including unlabeled data, use -1.
+        This should have the same order as X. That is, each row index in X
+        has an associated label with the same index in y.
+    params: dictionary of hyperparameters. Will depend on model used.
+        Alternatively, use model.params for models in RadClass/models
+    n_splits: int number of splits for K-Fold cross validation
+    stratified: bool; if True, balance the K-Folds to have roughly the same
+        proportion of samples from each class.
+    random_state: seed for reproducility.
+    '''
+
+    # return lists
+    accs = []
+    reports = []
+
+    if stratified:
+        cv = StratifiedKFold(n_splits=n_splits, random_state=random_state,
+                             shuffle=True)
+    else:
+        cv = KFold(n_splits=n_splits, random_state=random_state,
+                   shuffle=True)
+
+    # separate unlabeled data if included
+    Ux = None
+    Uy = None
+    if -1 in y:
+        U_idx = np.where(y == -1)[0]
+        L_idx = np.where(y != -1)[0]
+        Ux = X[U_idx]
+        Uy = y[U_idx]
+        Lx = X[L_idx]
+        Ly = y[L_idx]
+    else:
+        Lx = X
+        Ly = y
+    # conduct K-Fold cross validation
+    cv.get_n_splits(Lx, Ly)
+    for train_idx, test_idx in cv.split(Lx, Ly):
+        trainx, testx = Lx[train_idx], Lx[test_idx]
+        trainy, testy = Ly[train_idx], Ly[test_idx]
+
+        # construct data dictionary for training in fresh_start
+        data_dict = {'trainx': trainx, 'trainy': trainy,
+                     'testx': testx, 'testy': testy}
+        if Ux is not None:
+            data_dict['Ux'] = Ux
+            data_dict['Uy'] = Uy
+        results = model.fresh_start(params, data_dict)
+        accs = np.append(accs, results['accuracy'])
+        reports = np.append(reports, results)
+
+    # report cross validation results
+    print('Average accuracy:', np.mean(accs))
+    print('Max accuracy:', np.max(accs))
+    print('All accuracy:', accs)
+    # return the results of fresh_start for the max accuracy model
+    return reports[np.argmax(accs)]
+
+
+def pca(Lx, Ly, Ux, Uy, filename):
+    '''
+    A function for computing and plotting 2D PCA.
+    Inputs:
+    Lx: labeled feature data.
+    Ly: class labels for labeled data.
+    Ux: unlabeled feature data.
+    Uy: labels for unlabeled data (all labels should be -1).
+    filename: filename for saved plot.
+        The file must be saved with extension .joblib.
+        Added to filename if not included as input.
+    '''
+
+    plt.rcParams.update({'font.size': 20})
+    # only saving colors for binary classification with unlabeled instances
+    col_dict = {-1: 'tab:gray', 0: 'tab:orange', 1: 'tab:blue'}
+
+    pcadata = np.append(Lx, Ux, axis=0)
+    normalizer = StandardScaler()
+    x = normalizer.fit_transform(pcadata)
+    print(np.mean(pcadata), np.std(pcadata))
+    print(np.mean(x), np.std(x))
+
+    pca = PCA(n_components=2)
+    pca.fit_transform(x)
+    print(pca.explained_variance_ratio_)
+    print(pca.singular_values_)
+    print(pca.components_)
+
+    principalComponents = pca.fit_transform(x)
+
+    fig, ax = plt.subplots(figsize=(10, 8))
+    ax.set_xlabel('Principal Component 1', fontsize=15)
+    ax.set_ylabel('Principal Component 2', fontsize=15)
+    for idx, color in col_dict.items():
+        indices = np.where(np.append(Ly, Uy, axis=0) == idx)[0]
+        ax.scatter(principalComponents[indices, 0],
+                   principalComponents[indices, 1],
+                   c=color,
+                   label='class '+str(idx))
+    ax.grid()
+    ax.legend()
+
+    if filename[-4:] != '.png':
+        filename += '.png'
+    fig.tight_layout()
+    fig.savefig(filename)
+
+
+def multiD_pca(Lx, Ly, Ux, Uy, filename, n=2):
+    '''
+    A function for computing and plotting n-dimensional PCA.
+    Inputs:
+    Lx: labeled feature data.
+    Ly: class labels for labeled data.
+    Ux: unlabeled feature data.
+    Uy: labels for unlabeled data (all labels should be -1).
+    filename: filename for saved plot.
+        The file must be saved with extension .joblib.
+        Added to filename if not included as input.
+    n: number of singular values to include in PCA analysis.
+    '''
+
+    plt.rcParams.update({'font.size': 20})
+    # only saving colors for binary classification with unlabeled instances
+    col_dict = {-1: 'tab:gray', 0: 'tab:orange', 1: 'tab:blue'}
+
+    pcadata = np.append(Lx, Ux, axis=0)
+    normalizer = StandardScaler()
+    x = normalizer.fit_transform(pcadata)
+    print(np.mean(pcadata), np.std(pcadata))
+    print(np.mean(x), np.std(x))
+
+    n = 2
+    pca = PCA(n_components=n)
+    principalComponents = pca.fit_transform(x)
+    print(pca.explained_variance_ratio_)
+    print(pca.singular_values_)
+    print(pca.components_)
+
+    alph = ["A", "B", "C", "D", "E", "F", "G", "H",
+            "I", "J", "K", "L", "M", "N", "O", "P",
+            "Q", "R", "S", "T", "U", "V", "W", "X",
+            "Y", "Z"]
+    jobs = alph[:n]
+
+    fig, axes = plt.subplots(n, n, figsize=(15, 15))
+
+    for row in range(axes.shape[0]):
+        for col in range(axes.shape[1]):
+            ax = axes[row, col]
+            if row == col:
+                ax.tick_params(
+                    axis='both', which='both',
+                    bottom='off', top='off',
+                    labelbottom='off',
+                    left='off', right='off',
+                    labelleft='off'
+                )
+                ax.text(0.5, 0.5, jobs[row], horizontalalignment='center')
+            else:
+                for idx, color in col_dict.items():
+                    indices = np.where(np.append(Ly, Uy, axis=0) == idx)[0]
+                    ax.scatter(principalComponents[indices, row],
+                               principalComponents[indices, col],
+                               c=color,
+                               label='class '+str(idx))
+    fig.tight_layout()
+    if filename[-4:] != '.png':
+        filename += '.png'
+    fig.savefig(filename)
+
+
+def plot_cf(testy, predy, title, filename):
+    '''
+    Uses sklearn metric to compute a confusion matrix for visualization
+    Inputs:
+    testy: array/vector with ground-truth labels for test/evaluation set
+    predy: array/vector with predicted sample labels from trained model
+    title: string title for plot
+    filename: string with extension for confusion matrix file
+    '''
+
+    cf_matrix = confusion_matrix(testy, predy)
+    ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues')
+
+    ax.set_title(title)
+    ax.set_xlabel('\nPredicted Values')
+    ax.set_ylabel('Actual Values ')
+
+    # Ticket labels - List must be in alphabetical order
+    ax.xaxis.set_ticklabels(['0(SNM)', '1(other)'])
+    ax.yaxis.set_ticklabels(['0(SNM)', '1(other)'])
+    # Save the visualization of the Confusion Matrix.
+    plt.savefig(filename)
diff --git a/tests/test_BackgroundEstimator.py b/tests/test_BackgroundEstimator.py
index 2d10c89..efc1299 100644
--- a/tests/test_BackgroundEstimator.py
+++ b/tests/test_BackgroundEstimator.py
@@ -77,7 +77,6 @@ def test_write():
     bckg.write(ofilename=ofilename)
 
     results = np.loadtxt(fname=ofilename+'.csv', delimiter=',')
-    print(results)
 
     # the resulting observation should be:
     #   counts * integration / live-time
diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000..e3fb086
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,491 @@
+# diagnostics
+import numpy as np
+from datetime import datetime, timedelta
+# testing models
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+import tests.test_data as test_data
+# hyperopt
+from hyperopt.pyll.base import scope
+from hyperopt import hp
+# testing utils
+import scripts.utils as utils
+# models
+from models.LogReg import LogReg
+from models.SSML.CoTraining import CoTraining
+from models.SSML.LabelProp import LabelProp
+from models.SSML.ShadowNN import ShadowNN
+from models.SSML.ShadowCNN import ShadowCNN
+# testing write
+import joblib
+import os
+
+# initialize sample data
+start_date = datetime(2019, 2, 2)
+delta = timedelta(seconds=1)
+timestamps = np.arange(start_date,
+                       start_date + (test_data.timesteps * delta),
+                       delta).astype('datetime64[s]').astype('float64')
+
+live = np.full((len(timestamps),), test_data.livetime)
+sample_val = 1.0
+spectra = np.full((len(timestamps), test_data.energy_bins),
+                  np.full((1, test_data.energy_bins), sample_val))
+# setting up for rejected null hypothesis
+rejected_H0_time = np.random.choice(spectra.shape[0],
+                                    test_data.timesteps//2,
+                                    replace=False)
+spectra[rejected_H0_time] = 100.0
+
+labels = np.full((spectra.shape[0],), 0)
+labels[rejected_H0_time] = 1
+
+
+def test_utils():
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    Uy = np.full_like(Uy, -1)
+
+    # test cross validation for supervised data using LogReg
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    model = LogReg(params=params)
+    max_acc_model = utils.cross_validation(model=model,
+                                           X=X,
+                                           y=y,
+                                           params=params)
+    assert max_acc_model['accuracy'] >= 0.5
+
+    # test cross validation for supervised data and StratifiedKFold with LogReg
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    model = LogReg(params=params)
+    max_acc_model = utils.cross_validation(model=model,
+                                           X=X,
+                                           y=y,
+                                           params=params,
+                                           stratified=True)
+    assert max_acc_model['accuracy'] >= 0.5
+
+    # test cross validation for SSML with LabelProp
+    params = {'gamma': 10, 'n_neighbors': 15, 'max_iter': 2022, 'tol': 0.5}
+    model = LabelProp(params=params)
+    max_acc_model = utils.cross_validation(model=model,
+                                           X=np.append(X, Ux, axis=0),
+                                           y=np.append(y, Uy, axis=0),
+                                           params=params,
+                                           stratified=True)
+    assert max_acc_model['accuracy'] >= 0.5
+
+    # data split for data visualization
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    filename = 'test_pca'
+    utils.pca(X_train, y_train, Ux, np.full_like(Uy, -1), filename)
+    os.remove(filename+'.png')
+
+    filename = 'test_multiD_pca'
+    utils.multiD_pca(X_train, y_train, Ux, np.full_like(Uy, -1), filename, n=5)
+    os.remove(filename+'.png')
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+
+    # default behavior
+    model = LogReg(params=None, random_state=0)
+    model.train(X_train, y_train)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    filename = 'test_cf'
+    utils.plot_cf(y_test, pred, title=filename, filename=filename)
+    os.remove(filename+'.png')
+
+
+def test_LogReg():
+    # test saving model input parameters
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    model = LogReg(params=params)
+
+    assert model.model.max_iter == params['max_iter']
+    assert model.model.tol == params['tol']
+    assert model.model.C == params['C']
+
+    X_train, X_test, y_train, y_test = train_test_split(spectra,
+                                                        labels,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+
+    # default behavior
+    model = LogReg(params=None, random_state=0)
+    model.train(X_train, y_train)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    assert acc > 0.7
+    np.testing.assert_equal(pred, y_test)
+
+    # testing hyperopt optimize methods
+    space = {'max_iter': scope.int(hp.quniform('max_iter',
+                                               10,
+                                               10000,
+                                               10)),
+             'tol': hp.loguniform('tol', 1e-5, 1e-1),
+             'C': hp.uniform('C', 0.001, 1000.0)
+             }
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+    assert model.best['status'] == 'ok'
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)
+
+
+def test_CoTraining():
+    # test saving model input parameters
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    model = CoTraining(params=params)
+
+    assert model.model1.max_iter == params['max_iter']
+    assert model.model1.tol == params['tol']
+    assert model.model1.C == params['C']
+
+    assert model.model2.max_iter == params['max_iter']
+    assert model.model2.tol == params['tol']
+    assert model.model2.C == params['C']
+
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+    Ux = normalizer.transform(Ux)
+
+    # default behavior
+    model = CoTraining(params=None, random_state=0)
+    model.train(X_train, y_train, Ux)
+
+    # testing train and predict methods
+    pred, acc, *_ = model.predict(X_test, y_test)
+
+    assert acc > 0.7
+    np.testing.assert_equal(pred, y_test)
+
+    # testing hyperopt optimize methods
+    space = {'max_iter': scope.int(hp.quniform('max_iter',
+                                               10,
+                                               10000,
+                                               10)),
+             'tol': hp.loguniform('tol', 1e-5, 1e-3),
+             'C': hp.uniform('C', 1.0, 1000.0),
+             'n_samples': scope.int(hp.quniform('n_samples',
+                                                1,
+                                                20,
+                                                1)),
+             'seed': 0
+             }
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test,
+                 'Ux': Ux
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+    assert model.best['status'] == 'ok'
+
+    # testing model plotting method
+    filename = 'test_plot'
+    model.plot_cotraining(model1_accs=model.best['model1_acc_history'],
+                          model2_accs=model.best['model2_acc_history'],
+                          filename=filename)
+    os.remove(filename+'.png')
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)
+
+
+def test_LabelProp():
+    # test saving model input parameters
+    params = {'gamma': 10, 'n_neighbors': 15, 'max_iter': 2022, 'tol': 0.5}
+    model = LabelProp(params=params)
+
+    assert model.model.gamma == params['gamma']
+    assert model.model.n_neighbors == params['n_neighbors']
+    assert model.model.max_iter == params['max_iter']
+    assert model.model.tol == params['tol']
+
+    # there should be no normalization on LabelProp data
+    # since it depends on the distances between samples
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # default behavior
+    model = LabelProp(params=None, random_state=0)
+    model.train(X_train, y_train, Ux)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    # the default n_neighbors(=7) from sklearn is too large
+    # for the size of this dataset
+    # therefore the accuracy is expected to be poor
+    # a better value for this dataset would be n_neighbors=2
+    # (tested when specifying params in LabelProp.__init__)
+    assert acc >= 0.5
+    # uninteresting test if LabelProp predicts all one class
+    # TODO: make the default params test meaningful
+    assert np.count_nonzero(pred == y_test) > 0
+
+    # testing hyperopt optimize methods
+    space = {'max_iter': scope.int(hp.quniform('max_iter',
+                                               10,
+                                               10000,
+                                               10)),
+             'tol': hp.loguniform('tol', 1e-6, 1e-4),
+             'gamma': hp.uniform('gamma', 1, 50),
+             'n_neighbors': scope.int(hp.quniform('n_neighbors',
+                                                  1,
+                                                  X_train.shape[0],
+                                                  1))
+             }
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test,
+                 'Ux': Ux
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+    assert model.best['status'] == 'ok'
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)
+
+
+def test_ShadowNN():
+    # check default parameter settings
+    model = ShadowNN()
+    assert model.params == {'binning': 1}
+    assert model.eaat is not None
+    assert model.eaat_opt is not None
+    assert model.xEnt is not None
+    assert model.input_length == 1000
+
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+    Ux = normalizer.transform(Ux)
+
+    params = {'hidden_layer': 10,
+              'alpha': 0.1,
+              'xi': 1e-3,
+              'eps': 1.0,
+              'lr': 0.1,
+              'momentum': 0.9,
+              'binning': 20}
+    # default behavior
+    model = ShadowNN(params=params, random_state=0)
+    acc_history = model.train(X_train, y_train, Ux, X_test, y_test)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    # test for agreement between training and testing
+    # (since the same data is used for diagnostics in this test)
+    assert acc_history[-1] == acc
+
+    # Shadow/PyTorch reports accuracies as percentages
+    # rather than decimals
+    # uninteresting test if Shadow predicts all one class
+    # TODO: make the default params test meaningful
+    assert np.count_nonzero(pred == y_test) > 0
+
+    # testing hyperopt optimize methods
+    space = {'hidden_layer': 10,
+             'alpha': 0.1,
+             'xi': 1e-3,
+             'eps': 1.0,
+             'lr': 0.1,
+             'momentum': 0.9,
+             'binning': scope.int(hp.quniform('binning',
+                                              10,
+                                              20,
+                                              1))
+             }
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test,
+                 'Ux': Ux
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+    assert model.best['status'] == 'ok'
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)
+
+
+def test_ShadowCNN():
+    # check default parameter settings
+    model = ShadowCNN()
+    assert model.params == {'binning': 1, 'batch_size': 1}
+    assert model.model is not None
+    assert model.eaat is not None
+    assert model.optimizer is not None
+
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+    Ux = normalizer.transform(Ux)
+
+    params = {'layer1': 2,
+              'kernel': 3,
+              'alpha': 0.1,
+              'xi': 1e-3,
+              'eps': 1.0,
+              'lr': 0.1,
+              'momentum': 0.9,
+              'binning': 20,
+              'batch_size': 4,
+              'drop_rate': 0.1}
+
+    # default behavior
+    model = ShadowCNN(params=params, random_state=0)
+    losscurve, evalcurve = model.train(X_train, y_train, Ux, X_test, y_test)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    # test for agreement between training and testing
+    # (since the same data is used for diagnostics in this test)
+    assert evalcurve[-1] == acc
+
+    # Shadow/PyTorch reports accuracies as percentages
+    # rather than decimals
+    # uninteresting test if Shadow predicts all one class
+    # TODO: make the default params test meaningful
+    assert np.count_nonzero(pred == y_test) > 0
+
+    # testing hyperopt optimize methods
+    space = params
+    space['binning'] = scope.int(hp.quniform('binning',
+                                 10,
+                                 20,
+                                 1))
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test,
+                 'Ux': Ux
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+    assert model.best['status'] == 'ok'
+
+    # testing model plotting method
+    filename = 'test_plot'
+    model.plot_training(losscurve=model.best['losscurve'],
+                        evalcurve=model.best['evalcurve'],
+                        filename=filename)
+    os.remove(filename+'.png')
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)

From b335056f7cf357b4be9825e7d75d626f90f47035 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Mon, 31 Oct 2022 14:22:42 -0400
Subject: [PATCH 02/14] adding Logistic Regression class implementation

---
 models/LogReg.py   | 162 +++++++++++++++++++++++++++++++++++++++++++++
 models/__init__.py |   0
 2 files changed, 162 insertions(+)
 create mode 100644 models/LogReg.py
 create mode 100644 models/__init__.py

diff --git a/models/LogReg.py b/models/LogReg.py
new file mode 100644
index 0000000..4ebfce2
--- /dev/null
+++ b/models/LogReg.py
@@ -0,0 +1,162 @@
+# For hyperopt (parameter optimization)
+from hyperopt import STATUS_OK
+# sklearn models
+from sklearn import linear_model
+# diagnostics
+from sklearn.metrics import balanced_accuracy_score
+from scripts.utils import run_hyperopt
+import joblib
+
+
+class LogReg:
+    '''
+    Methods for deploying sklearn's logistic regression
+    implementation with hyperparameter optimization.
+    Data agnostic (i.e. user supplied data inputs).
+    TODO: Currently only supports binary classification.
+        Add multinomial functions and unit tests.
+        Add functionality for regression(?)
+    Inputs:
+    params: dictionary of logistic regression input functions.
+        keys max_iter, tol, and C supported.
+    random_state: int/float for reproducible intiailization.
+    '''
+
+    # only binary so far
+    def __init__(self, params=None, random_state=0):
+        # defaults to a fixed value for reproducibility
+        self.random_state = random_state
+        # dictionary of parameters for logistic regression model
+        self.params = params
+        if self.params is None:
+            self.model = linear_model.LogisticRegression(
+                            random_state=self.random_state
+                        )
+        else:
+            self.model = linear_model.LogisticRegression(
+                            random_state=self.random_state,
+                            max_iter=params['max_iter'],
+                            tol=params['tol'],
+                            C=params['C']
+                        )
+
+    def fresh_start(self, params, data_dict):
+        '''
+        Required method for hyperopt optimization.
+        Trains and tests a fresh logistic regression model
+        with given input parameters.
+        This method does not overwrite self.model (self.optimize() does).
+        Inputs:
+        params: dictionary of logistic regression input functions.
+            keys max_iter, tol, and C supported.
+        data_dict: compact data representation with the four requisite
+            data structures used for training and testing a model.
+            keys trainx, trainy, testx, and testy required.
+        '''
+
+        # unpack data
+        trainx = data_dict['trainx']
+        trainy = data_dict['trainy']
+        testx = data_dict['testx']
+        testy = data_dict['testy']
+
+        # supervised logistic regression
+        clf = LogReg(params=params, random_state=self.random_state)
+        # train and test model
+        clf.train(trainx, trainy)
+        # uses balanced_accuracy accounts for class imbalanced data
+        clf_pred, acc = clf.predict(testx, testy)
+
+        # loss function minimizes misclassification
+        return {'loss': 1-acc,
+                'status': STATUS_OK,
+                'model': clf.model,
+                'params': params,
+                'accuracy': acc}
+
+    def optimize(self, space, data_dict, max_evals=50, verbose=True):
+        '''
+        Wrapper method for using hyperopt (see utils.run_hyperopt
+        for more details). After hyperparameter optimization, results
+        are stored, the best model -overwrites- self.model, and the
+        best params -overwrite- self.params.
+        Inputs:
+        space: a hyperopt compliant dictionary with defined optimization
+            spaces. For example:
+                # quniform returns float, some parameters require int;
+                # use this to force int
+                space = {'max_iter': scope.int(hp.quniform('max_iter',
+                                                           10,
+                                                           10000,
+                                                           10)),
+                        'tol'      : hp.loguniform('tol', 1e-5, 1e-1),
+                        'C'        : hp.uniform('C', 0.001,1000.0)
+                        }
+            See hyperopt docs for more information.
+        data_dict: compact data representation with the four requisite
+            data structures used for training and testing a model.
+            keys trainx, trainy, testx, testy required.
+        max_evals: the number of epochs for hyperparameter optimization.
+            Each iteration is one set of hyperparameters trained
+            and tested on a fresh model. Convergence for simpler
+            models like logistic regression typically happens well
+            before 50 epochs, but can increase as more complex models,
+            more hyperparameters, and a larger hyperparameter space is tested.
+        verbose: boolean. If true, print results of hyperopt.
+            If false, print only the progress bar for optimization.
+        '''
+
+        best, worst = run_hyperopt(space=space,
+                                   model=self.fresh_start,
+                                   data_dict=data_dict,
+                                   max_evals=max_evals,
+                                   verbose=verbose)
+
+        # save the results of hyperparameter optimization
+        self.best = best
+        self.model = best['model']
+        self.params = best['params']
+        self.worst = worst
+
+    def train(self, trainx, trainy):
+        '''
+        Wrapper method for sklearn's logisitic regression training method.
+        Inputs:
+        trainx: nxm feature vector/matrix for training model.
+        trainy: nxk class label vector/matrix for training model.
+        '''
+
+        # supervised logistic regression
+        self.model.fit(trainx, trainy)
+
+    def predict(self, testx, testy=None):
+        '''
+        Wrapper method for sklearn's logistic regression predict method.
+        Inputs:
+        testx: nxm feature vector/matrix for testing model.
+        testy: nxk class label vector/matrix for training model.
+            optional: if included, the predicted classes -and-
+            the resulting classification accuracy will be returned.
+        '''
+
+        pred = self.model.predict(testx)
+
+        acc = None
+        if testy is not None:
+            # uses balanced_accuracy_score to account for class imbalance
+            acc = balanced_accuracy_score(testy, pred)
+
+        return pred, acc
+
+    def save(self, filename):
+        '''
+        Save class instance to file using joblib.
+        Inputs:
+        filename: string filename to save object to file under.
+            The file must be saved with extension .joblib.
+            Added to filename if not included as input.
+        '''
+
+        if filename[-7:] != '.joblib':
+            filename += '.joblib'
+        joblib.dump(self, filename)
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 0000000..e69de29

From c66109929a58c69f1f0ba1f8bd023fb7fff6a546 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Mon, 31 Oct 2022 14:54:46 -0400
Subject: [PATCH 03/14] removing pytests for future ML implementations

---
 tests/test_models.py | 236 -------------------------------------------
 1 file changed, 236 deletions(-)

diff --git a/tests/test_models.py b/tests/test_models.py
index e3fb086..0669ccf 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -169,242 +169,6 @@ def test_LogReg():
     os.remove(filename+ext)
 
 
-def test_CoTraining():
-    # test saving model input parameters
-    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
-    model = CoTraining(params=params)
-
-    assert model.model1.max_iter == params['max_iter']
-    assert model.model1.tol == params['tol']
-    assert model.model1.C == params['C']
-
-    assert model.model2.max_iter == params['max_iter']
-    assert model.model2.tol == params['tol']
-    assert model.model2.C == params['C']
-
-    X, Ux, y, Uy = train_test_split(spectra,
-                                    labels,
-                                    test_size=0.5,
-                                    random_state=0)
-    X_train, X_test, y_train, y_test = train_test_split(X,
-                                                        y,
-                                                        test_size=0.2,
-                                                        random_state=0)
-
-    # normalization
-    normalizer = StandardScaler()
-    normalizer.fit(X_train)
-
-    X_train = normalizer.transform(X_train)
-    X_test = normalizer.transform(X_test)
-    Ux = normalizer.transform(Ux)
-
-    # default behavior
-    model = CoTraining(params=None, random_state=0)
-    model.train(X_train, y_train, Ux)
-
-    # testing train and predict methods
-    pred, acc, *_ = model.predict(X_test, y_test)
-
-    assert acc > 0.7
-    np.testing.assert_equal(pred, y_test)
-
-    # testing hyperopt optimize methods
-    space = {'max_iter': scope.int(hp.quniform('max_iter',
-                                               10,
-                                               10000,
-                                               10)),
-             'tol': hp.loguniform('tol', 1e-5, 1e-3),
-             'C': hp.uniform('C', 1.0, 1000.0),
-             'n_samples': scope.int(hp.quniform('n_samples',
-                                                1,
-                                                20,
-                                                1)),
-             'seed': 0
-             }
-    data_dict = {'trainx': X_train,
-                 'testx': X_test,
-                 'trainy': y_train,
-                 'testy': y_test,
-                 'Ux': Ux
-                 }
-    model.optimize(space, data_dict, max_evals=2, verbose=True)
-
-    assert model.best['accuracy'] >= model.worst['accuracy']
-    assert model.best['status'] == 'ok'
-
-    # testing model plotting method
-    filename = 'test_plot'
-    model.plot_cotraining(model1_accs=model.best['model1_acc_history'],
-                          model2_accs=model.best['model2_acc_history'],
-                          filename=filename)
-    os.remove(filename+'.png')
-
-    # testing model write to file method
-    filename = 'test_LogReg'
-    ext = '.joblib'
-    model.save(filename)
-    model_file = joblib.load(filename+ext)
-    assert model_file.best['params'] == model.best['params']
-
-    os.remove(filename+ext)
-
-
-def test_LabelProp():
-    # test saving model input parameters
-    params = {'gamma': 10, 'n_neighbors': 15, 'max_iter': 2022, 'tol': 0.5}
-    model = LabelProp(params=params)
-
-    assert model.model.gamma == params['gamma']
-    assert model.model.n_neighbors == params['n_neighbors']
-    assert model.model.max_iter == params['max_iter']
-    assert model.model.tol == params['tol']
-
-    # there should be no normalization on LabelProp data
-    # since it depends on the distances between samples
-    X, Ux, y, Uy = train_test_split(spectra,
-                                    labels,
-                                    test_size=0.5,
-                                    random_state=0)
-    X_train, X_test, y_train, y_test = train_test_split(X,
-                                                        y,
-                                                        test_size=0.2,
-                                                        random_state=0)
-
-    # default behavior
-    model = LabelProp(params=None, random_state=0)
-    model.train(X_train, y_train, Ux)
-
-    # testing train and predict methods
-    pred, acc = model.predict(X_test, y_test)
-
-    # the default n_neighbors(=7) from sklearn is too large
-    # for the size of this dataset
-    # therefore the accuracy is expected to be poor
-    # a better value for this dataset would be n_neighbors=2
-    # (tested when specifying params in LabelProp.__init__)
-    assert acc >= 0.5
-    # uninteresting test if LabelProp predicts all one class
-    # TODO: make the default params test meaningful
-    assert np.count_nonzero(pred == y_test) > 0
-
-    # testing hyperopt optimize methods
-    space = {'max_iter': scope.int(hp.quniform('max_iter',
-                                               10,
-                                               10000,
-                                               10)),
-             'tol': hp.loguniform('tol', 1e-6, 1e-4),
-             'gamma': hp.uniform('gamma', 1, 50),
-             'n_neighbors': scope.int(hp.quniform('n_neighbors',
-                                                  1,
-                                                  X_train.shape[0],
-                                                  1))
-             }
-    data_dict = {'trainx': X_train,
-                 'testx': X_test,
-                 'trainy': y_train,
-                 'testy': y_test,
-                 'Ux': Ux
-                 }
-    model.optimize(space, data_dict, max_evals=2, verbose=True)
-
-    assert model.best['accuracy'] >= model.worst['accuracy']
-    assert model.best['status'] == 'ok'
-
-    # testing model write to file method
-    filename = 'test_LogReg'
-    ext = '.joblib'
-    model.save(filename)
-    model_file = joblib.load(filename+ext)
-    assert model_file.best['params'] == model.best['params']
-
-    os.remove(filename+ext)
-
-
-def test_ShadowNN():
-    # check default parameter settings
-    model = ShadowNN()
-    assert model.params == {'binning': 1}
-    assert model.eaat is not None
-    assert model.eaat_opt is not None
-    assert model.xEnt is not None
-    assert model.input_length == 1000
-
-    X, Ux, y, Uy = train_test_split(spectra,
-                                    labels,
-                                    test_size=0.5,
-                                    random_state=0)
-    X_train, X_test, y_train, y_test = train_test_split(X,
-                                                        y,
-                                                        test_size=0.2,
-                                                        random_state=0)
-
-    # normalization
-    normalizer = StandardScaler()
-    normalizer.fit(X_train)
-
-    X_train = normalizer.transform(X_train)
-    X_test = normalizer.transform(X_test)
-    Ux = normalizer.transform(Ux)
-
-    params = {'hidden_layer': 10,
-              'alpha': 0.1,
-              'xi': 1e-3,
-              'eps': 1.0,
-              'lr': 0.1,
-              'momentum': 0.9,
-              'binning': 20}
-    # default behavior
-    model = ShadowNN(params=params, random_state=0)
-    acc_history = model.train(X_train, y_train, Ux, X_test, y_test)
-
-    # testing train and predict methods
-    pred, acc = model.predict(X_test, y_test)
-
-    # test for agreement between training and testing
-    # (since the same data is used for diagnostics in this test)
-    assert acc_history[-1] == acc
-
-    # Shadow/PyTorch reports accuracies as percentages
-    # rather than decimals
-    # uninteresting test if Shadow predicts all one class
-    # TODO: make the default params test meaningful
-    assert np.count_nonzero(pred == y_test) > 0
-
-    # testing hyperopt optimize methods
-    space = {'hidden_layer': 10,
-             'alpha': 0.1,
-             'xi': 1e-3,
-             'eps': 1.0,
-             'lr': 0.1,
-             'momentum': 0.9,
-             'binning': scope.int(hp.quniform('binning',
-                                              10,
-                                              20,
-                                              1))
-             }
-    data_dict = {'trainx': X_train,
-                 'testx': X_test,
-                 'trainy': y_train,
-                 'testy': y_test,
-                 'Ux': Ux
-                 }
-    model.optimize(space, data_dict, max_evals=2, verbose=True)
-
-    assert model.best['accuracy'] >= model.worst['accuracy']
-    assert model.best['status'] == 'ok'
-
-    # testing model write to file method
-    filename = 'test_LogReg'
-    ext = '.joblib'
-    model.save(filename)
-    model_file = joblib.load(filename+ext)
-    assert model_file.best['params'] == model.best['params']
-
-    os.remove(filename+ext)
-
-
-def test_ShadowCNN():
     # check default parameter settings
     model = ShadowCNN()
     assert model.params == {'binning': 1, 'batch_size': 1}

From a7d4bfe5d7e598559a7465b2eca8f8b19ac13531 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Mon, 31 Oct 2022 14:55:59 -0400
Subject: [PATCH 04/14] commenting cross validation test for ssml since it will
 be included in the LabelProp PR

---
 tests/test_models.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/test_models.py b/tests/test_models.py
index 0669ccf..374573b 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -67,15 +67,15 @@ def test_utils():
                                            stratified=True)
     assert max_acc_model['accuracy'] >= 0.5
 
-    # test cross validation for SSML with LabelProp
-    params = {'gamma': 10, 'n_neighbors': 15, 'max_iter': 2022, 'tol': 0.5}
-    model = LabelProp(params=params)
-    max_acc_model = utils.cross_validation(model=model,
-                                           X=np.append(X, Ux, axis=0),
-                                           y=np.append(y, Uy, axis=0),
-                                           params=params,
-                                           stratified=True)
-    assert max_acc_model['accuracy'] >= 0.5
+    # # test cross validation for SSML with LabelProp
+    # params = {'gamma': 10, 'n_neighbors': 15, 'max_iter': 2022, 'tol': 0.5}
+    # model = LabelProp(params=params)
+    # max_acc_model = utils.cross_validation(model=model,
+    #                                        X=np.append(X, Ux, axis=0),
+    #                                        y=np.append(y, Uy, axis=0),
+    #                                        params=params,
+    #                                        stratified=True)
+    # assert max_acc_model['accuracy'] >= 0.5
 
     # data split for data visualization
     X_train, X_test, y_train, y_test = train_test_split(X,

From 738720ee09cc578ace5da7888083f329b06a1c88 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Mon, 31 Oct 2022 15:01:08 -0400
Subject: [PATCH 05/14] removing leftovers from migration

---
 tests/test_models.py | 92 +-------------------------------------------
 1 file changed, 1 insertion(+), 91 deletions(-)

diff --git a/tests/test_models.py b/tests/test_models.py
index 374573b..e452361 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -12,10 +12,6 @@
 import scripts.utils as utils
 # models
 from models.LogReg import LogReg
-from models.SSML.CoTraining import CoTraining
-from models.SSML.LabelProp import LabelProp
-from models.SSML.ShadowNN import ShadowNN
-from models.SSML.ShadowCNN import ShadowCNN
 # testing write
 import joblib
 import os
@@ -166,90 +162,4 @@ def test_LogReg():
     model_file = joblib.load(filename+ext)
     assert model_file.best['params'] == model.best['params']
 
-    os.remove(filename+ext)
-
-
-    # check default parameter settings
-    model = ShadowCNN()
-    assert model.params == {'binning': 1, 'batch_size': 1}
-    assert model.model is not None
-    assert model.eaat is not None
-    assert model.optimizer is not None
-
-    X, Ux, y, Uy = train_test_split(spectra,
-                                    labels,
-                                    test_size=0.5,
-                                    random_state=0)
-    X_train, X_test, y_train, y_test = train_test_split(X,
-                                                        y,
-                                                        test_size=0.2,
-                                                        random_state=0)
-
-    # normalization
-    normalizer = StandardScaler()
-    normalizer.fit(X_train)
-
-    X_train = normalizer.transform(X_train)
-    X_test = normalizer.transform(X_test)
-    Ux = normalizer.transform(Ux)
-
-    params = {'layer1': 2,
-              'kernel': 3,
-              'alpha': 0.1,
-              'xi': 1e-3,
-              'eps': 1.0,
-              'lr': 0.1,
-              'momentum': 0.9,
-              'binning': 20,
-              'batch_size': 4,
-              'drop_rate': 0.1}
-
-    # default behavior
-    model = ShadowCNN(params=params, random_state=0)
-    losscurve, evalcurve = model.train(X_train, y_train, Ux, X_test, y_test)
-
-    # testing train and predict methods
-    pred, acc = model.predict(X_test, y_test)
-
-    # test for agreement between training and testing
-    # (since the same data is used for diagnostics in this test)
-    assert evalcurve[-1] == acc
-
-    # Shadow/PyTorch reports accuracies as percentages
-    # rather than decimals
-    # uninteresting test if Shadow predicts all one class
-    # TODO: make the default params test meaningful
-    assert np.count_nonzero(pred == y_test) > 0
-
-    # testing hyperopt optimize methods
-    space = params
-    space['binning'] = scope.int(hp.quniform('binning',
-                                 10,
-                                 20,
-                                 1))
-    data_dict = {'trainx': X_train,
-                 'testx': X_test,
-                 'trainy': y_train,
-                 'testy': y_test,
-                 'Ux': Ux
-                 }
-    model.optimize(space, data_dict, max_evals=2, verbose=True)
-
-    assert model.best['accuracy'] >= model.worst['accuracy']
-    assert model.best['status'] == 'ok'
-
-    # testing model plotting method
-    filename = 'test_plot'
-    model.plot_training(losscurve=model.best['losscurve'],
-                        evalcurve=model.best['evalcurve'],
-                        filename=filename)
-    os.remove(filename+'.png')
-
-    # testing model write to file method
-    filename = 'test_LogReg'
-    ext = '.joblib'
-    model.save(filename)
-    model_file = joblib.load(filename+ext)
-    assert model_file.best['params'] == model.best['params']
-
-    os.remove(filename+ext)
+    os.remove(filename+ext)
\ No newline at end of file

From b6a2c3630a2336323446232b9adfa7773fc89907 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Mon, 28 Nov 2022 14:31:27 -0600
Subject: [PATCH 06/14] condensing bool return

Co-authored-by: Paul Wilson <paul.wilson@wisc.edu>
---
 scripts/utils.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/scripts/utils.py b/scripts/utils.py
index d91c826..f33f459 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -49,9 +49,7 @@ def early_stop(self, validation_loss):
             # keep track of whether validation loss has been decreasing
             # by a tolerable amount
             self.counter += 1
-            if self.counter >= self.patience:
-                return True
-        return False
+        return self.counter >= self.patience
 
 
 def run_hyperopt(space, model, data_dict, max_evals=50, verbose=True):

From de820fae34c9aa4839ce666315be0a67eb8dec76 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Mon, 28 Nov 2022 16:56:09 -0500
Subject: [PATCH 07/14] addressing comments from PR review

---
 models/LogReg.py     |  17 ++++--
 scripts/utils.py     | 130 +++++++++++++++++++++----------------------
 tests/test_models.py |  14 +++--
 3 files changed, 83 insertions(+), 78 deletions(-)

diff --git a/models/LogReg.py b/models/LogReg.py
index 4ebfce2..8886b94 100644
--- a/models/LogReg.py
+++ b/models/LogReg.py
@@ -24,6 +24,7 @@ class LogReg:
 
     # only binary so far
     def __init__(self, params=None, random_state=0):
+        keys = ['max_iter', 'tol', 'C']
         # defaults to a fixed value for reproducibility
         self.random_state = random_state
         # dictionary of parameters for logistic regression model
@@ -33,12 +34,16 @@ def __init__(self, params=None, random_state=0):
                             random_state=self.random_state
                         )
         else:
-            self.model = linear_model.LogisticRegression(
-                            random_state=self.random_state,
-                            max_iter=params['max_iter'],
-                            tol=params['tol'],
-                            C=params['C']
-                        )
+            if all(key in params.keys() for key in keys):
+                self.model = linear_model.LogisticRegression(
+                                random_state=self.random_state,
+                                max_iter=params['max_iter'],
+                                tol=params['tol'],
+                                C=params['C']
+                            )
+            else:
+                missing = [key for key in keys if key not in params.keys()]
+                raise ValueError('Values for {} not in params'.format(missing))
 
     def fresh_start(self, params, data_dict):
         '''
diff --git a/scripts/utils.py b/scripts/utils.py
index d91c826..98edd66 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -191,30 +191,22 @@ def cross_validation(model, X, y, params, n_splits=3,
     return reports[np.argmax(accs)]
 
 
-def pca(Lx, Ly, Ux, Uy, filename):
+def pca(Lx, Ux, n):
     '''
-    A function for computing and plotting 2D PCA.
+    A function for computing n-component PCA.
     Inputs:
     Lx: labeled feature data.
-    Ly: class labels for labeled data.
     Ux: unlabeled feature data.
-    Uy: labels for unlabeled data (all labels should be -1).
-    filename: filename for saved plot.
-        The file must be saved with extension .joblib.
-        Added to filename if not included as input.
+    n: number of singular values to include in PCA analysis.
     '''
 
-    plt.rcParams.update({'font.size': 20})
-    # only saving colors for binary classification with unlabeled instances
-    col_dict = {-1: 'tab:gray', 0: 'tab:orange', 1: 'tab:blue'}
-
     pcadata = np.append(Lx, Ux, axis=0)
     normalizer = StandardScaler()
     x = normalizer.fit_transform(pcadata)
     print(np.mean(pcadata), np.std(pcadata))
     print(np.mean(x), np.std(x))
 
-    pca = PCA(n_components=2)
+    pca = PCA(n_components=n)
     pca.fit_transform(x)
     print(pca.explained_variance_ratio_)
     print(pca.singular_values_)
@@ -222,31 +214,37 @@ def pca(Lx, Ly, Ux, Uy, filename):
 
     principalComponents = pca.fit_transform(x)
 
-    fig, ax = plt.subplots(figsize=(10, 8))
-    ax.set_xlabel('Principal Component 1', fontsize=15)
-    ax.set_ylabel('Principal Component 2', fontsize=15)
+    return principalComponents
+
+
+def _plot_pca_data(principalComponents, Ly, Uy, ax):
+    '''
+    Helper function for plot_pca that plots data for a given axis.
+    Inputs:
+    principalComponents: ndarray of shape (n_samples, n_components).
+    Ly: class labels for labeled data.
+    Uy: labels for unlabeled data (all labels should be -1).
+    ax: matplotlib-axis to plot on.
+    '''
+
+    # only saving colors for binary classification with unlabeled instances
+    col_dict = {-1: 'tab:gray', 0: 'tab:orange', 1: 'tab:blue'}
+
     for idx, color in col_dict.items():
         indices = np.where(np.append(Ly, Uy, axis=0) == idx)[0]
         ax.scatter(principalComponents[indices, 0],
                    principalComponents[indices, 1],
                    c=color,
                    label='class '+str(idx))
-    ax.grid()
-    ax.legend()
-
-    if filename[-4:] != '.png':
-        filename += '.png'
-    fig.tight_layout()
-    fig.savefig(filename)
+    return ax
 
 
-def multiD_pca(Lx, Ly, Ux, Uy, filename, n=2):
+def plot_pca(principalComponents, Ly, Uy, filename, n=2):
     '''
     A function for computing and plotting n-dimensional PCA.
     Inputs:
-    Lx: labeled feature data.
+    principalComponents: ndarray of shape (n_samples, n_components).
     Ly: class labels for labeled data.
-    Ux: unlabeled feature data.
     Uy: labels for unlabeled data (all labels should be -1).
     filename: filename for saved plot.
         The file must be saved with extension .joblib.
@@ -255,21 +253,6 @@ def multiD_pca(Lx, Ly, Ux, Uy, filename, n=2):
     '''
 
     plt.rcParams.update({'font.size': 20})
-    # only saving colors for binary classification with unlabeled instances
-    col_dict = {-1: 'tab:gray', 0: 'tab:orange', 1: 'tab:blue'}
-
-    pcadata = np.append(Lx, Ux, axis=0)
-    normalizer = StandardScaler()
-    x = normalizer.fit_transform(pcadata)
-    print(np.mean(pcadata), np.std(pcadata))
-    print(np.mean(x), np.std(x))
-
-    n = 2
-    pca = PCA(n_components=n)
-    principalComponents = pca.fit_transform(x)
-    print(pca.explained_variance_ratio_)
-    print(pca.singular_values_)
-    print(pca.components_)
 
     alph = ["A", "B", "C", "D", "E", "F", "G", "H",
             "I", "J", "K", "L", "M", "N", "O", "P",
@@ -277,30 +260,36 @@ def multiD_pca(Lx, Ly, Ux, Uy, filename, n=2):
             "Y", "Z"]
     jobs = alph[:n]
 
-    fig, axes = plt.subplots(n, n, figsize=(15, 15))
-
-    for row in range(axes.shape[0]):
-        for col in range(axes.shape[1]):
-            ax = axes[row, col]
-            if row == col:
-                ax.tick_params(
-                    axis='both', which='both',
-                    bottom='off', top='off',
-                    labelbottom='off',
-                    left='off', right='off',
-                    labelleft='off'
-                )
-                ax.text(0.5, 0.5, jobs[row], horizontalalignment='center')
-            else:
-                for idx, color in col_dict.items():
-                    indices = np.where(np.append(Ly, Uy, axis=0) == idx)[0]
-                    ax.scatter(principalComponents[indices, row],
-                               principalComponents[indices, col],
-                               c=color,
-                               label='class '+str(idx))
-    fig.tight_layout()
+    # only one plot is needed for n=2
+    if n == 2:
+        fig, ax = plt.subplots(figsize=(10, 8))
+        ax.set_xlabel('PC '+jobs[0], fontsize=15)
+        ax.set_ylabel('PC '+jobs[1], fontsize=15)
+        ax = _plot_pca_data(principalComponents, Ly, Uy, ax)
+        ax.grid()
+        ax.legend()
+    else:
+        fig, axes = plt.subplots(n, n, figsize=(15, 15))
+        for row in range(axes.shape[0]):
+            for col in range(axes.shape[1]):
+                ax = axes[row, col]
+                # blank label plot
+                if row == col:
+                    ax.tick_params(
+                        axis='both', which='both',
+                        bottom='off', top='off',
+                        labelbottom='off',
+                        left='off', right='off',
+                        labelleft='off'
+                    )
+                    ax.text(0.5, 0.5, jobs[row], horizontalalignment='center')
+                # PCA results
+                else:
+                    ax = _plot_pca_data(principalComponents, Ly, Uy, ax)
+
     if filename[-4:] != '.png':
         filename += '.png'
+    fig.tight_layout()
     fig.savefig(filename)
 
 
@@ -315,14 +304,19 @@ def plot_cf(testy, predy, title, filename):
     '''
 
     cf_matrix = confusion_matrix(testy, predy)
-    ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues')
+    fig, ax = plt.subplots(figsize=(10, 8))
+    ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues',
+                     vmin=0, vmax=100, ax=ax, annot_kws={'fontsize': 40})
+    # increase fontsize on colorbar
+    cbar = ax.collections[0].colorbar
+    cbar.ax.tick_params(labelsize=25)
 
-    ax.set_title(title)
-    ax.set_xlabel('\nPredicted Values')
-    ax.set_ylabel('Actual Values ')
+    ax.set_title(title, fontsize=30)
+    ax.set_xlabel('\nPredicted Values', fontsize=25)
+    ax.set_ylabel('Actual Values ', fontsize=25)
 
     # Ticket labels - List must be in alphabetical order
-    ax.xaxis.set_ticklabels(['0(SNM)', '1(other)'])
-    ax.yaxis.set_ticklabels(['0(SNM)', '1(other)'])
+    ax.xaxis.set_ticklabels(['0(SNM)', '1(other)'], fontsize=25)
+    ax.yaxis.set_ticklabels(['0(SNM)', '1(other)'], fontsize=25)
     # Save the visualization of the Confusion Matrix.
     plt.savefig(filename)
diff --git a/tests/test_models.py b/tests/test_models.py
index e452361..de56b1c 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,5 +1,6 @@
 # diagnostics
 import numpy as np
+import pytest
 from datetime import datetime, timedelta
 # testing models
 from sklearn.model_selection import train_test_split
@@ -80,12 +81,13 @@ def test_utils():
                                                         random_state=0)
 
     filename = 'test_pca'
-    utils.pca(X_train, y_train, Ux, np.full_like(Uy, -1), filename)
+    pcs = utils.pca(X_train, Ux, 2)
+    utils.plot_pca(pcs, y_train, np.full_like(Uy, -1), filename, 2)
     os.remove(filename+'.png')
 
-    filename = 'test_multiD_pca'
-    utils.multiD_pca(X_train, y_train, Ux, np.full_like(Uy, -1), filename, n=5)
-    os.remove(filename+'.png')
+    # filename = 'test_multiD_pca'
+    # utils.multiD_pca(X_train, y_train, Ux, np.full_like(Uy, -1), filename, n=5)
+    # os.remove(filename+'.png')
 
     # normalization
     normalizer = StandardScaler()
@@ -108,6 +110,10 @@ def test_utils():
 
 def test_LogReg():
     # test saving model input parameters
+    # missing 'C' key
+    params = {'max_iter': 2022, 'tol': 0.5}
+    with pytest.raises(ValueError):
+        LogReg(params=params)
     params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
     model = LogReg(params=params)
 

From 49ed49133885db088c129b0fcfac81db190efce4 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Wed, 7 Dec 2022 10:38:05 -0500
Subject: [PATCH 08/14] splitting utils test between cross_validation and pca

---
 tests/test_models.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/test_models.py b/tests/test_models.py
index de56b1c..4253f5d 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -38,7 +38,7 @@
 labels[rejected_H0_time] = 1
 
 
-def test_utils():
+def test_cross_validation():
     X, Ux, y, Uy = train_test_split(spectra,
                                     labels,
                                     test_size=0.5,
@@ -74,6 +74,13 @@ def test_utils():
     #                                        stratified=True)
     # assert max_acc_model['accuracy'] >= 0.5
 
+def test_pca():
+    # unlabeled data split
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    Uy = np.full_like(Uy, -1)
     # data split for data visualization
     X_train, X_test, y_train, y_test = train_test_split(X,
                                                         y,

From 785e4245ace4ecaedd898d8fc4acf705b2c1d83d Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Thu, 15 Dec 2022 14:50:45 -0500
Subject: [PATCH 09/14] implementing more meaningful test data for testing
 cross validation

---
 scripts/utils.py     |   9 ++--
 tests/test_models.py | 100 ++++++++++++++++++++++++-------------------
 2 files changed, 61 insertions(+), 48 deletions(-)

diff --git a/scripts/utils.py b/scripts/utils.py
index 2926b28..85c113d 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -97,7 +97,7 @@ def run_hyperopt(space, model, data_dict, max_evals=50, verbose=True):
 
 
 def cross_validation(model, X, y, params, n_splits=3,
-                     stratified=False, random_state=None):
+                     stratified=False, random_state=None, shuffle=True):
     '''
     Perform K-Fold cross validation using sklearn and a given model.
     The model *must* have a fresh_start method (see models in RadClass/models).
@@ -139,6 +139,7 @@ def cross_validation(model, X, y, params, n_splits=3,
     stratified: bool; if True, balance the K-Folds to have roughly the same
         proportion of samples from each class.
     random_state: seed for reproducility.
+    shuffle: bool; if True, shuffle the data before conducting K-folds.
     '''
 
     # return lists
@@ -147,10 +148,10 @@ def cross_validation(model, X, y, params, n_splits=3,
 
     if stratified:
         cv = StratifiedKFold(n_splits=n_splits, random_state=random_state,
-                             shuffle=True)
+                             shuffle=shuffle)
     else:
         cv = KFold(n_splits=n_splits, random_state=random_state,
-                   shuffle=True)
+                   shuffle=shuffle)
 
     # separate unlabeled data if included
     Ux = None
@@ -186,7 +187,7 @@ def cross_validation(model, X, y, params, n_splits=3,
     print('Max accuracy:', np.max(accs))
     print('All accuracy:', accs)
     # return the results of fresh_start for the max accuracy model
-    return reports[np.argmax(accs)]
+    return reports
 
 
 def pca(Lx, Ux, n):
diff --git a/tests/test_models.py b/tests/test_models.py
index 4253f5d..52b59ec 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -5,6 +5,7 @@
 # testing models
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
+from sklearn.datasets import make_classification
 import tests.test_data as test_data
 # hyperopt
 from hyperopt.pyll.base import scope
@@ -17,54 +18,64 @@
 import joblib
 import os
 
-# initialize sample data
-start_date = datetime(2019, 2, 2)
-delta = timedelta(seconds=1)
-timestamps = np.arange(start_date,
-                       start_date + (test_data.timesteps * delta),
-                       delta).astype('datetime64[s]').astype('float64')
 
-live = np.full((len(timestamps),), test_data.livetime)
-sample_val = 1.0
-spectra = np.full((len(timestamps), test_data.energy_bins),
-                  np.full((1, test_data.energy_bins), sample_val))
-# setting up for rejected null hypothesis
-rejected_H0_time = np.random.choice(spectra.shape[0],
-                                    test_data.timesteps//2,
-                                    replace=False)
-spectra[rejected_H0_time] = 100.0
-
-labels = np.full((spectra.shape[0],), 0)
-labels[rejected_H0_time] = 1
+@pytest.fixture(scope='module', autouse=True)
+def data_init():
+    # initialize sample data
+    start_date = datetime(2019, 2, 2)
+    delta = timedelta(seconds=1)
+    pytest.timestamps = np.arange(start_date,
+                                  start_date + (test_data.timesteps * delta),
+                                  delta
+                                  ).astype('datetime64[s]').astype('float64')
+
+    pytest.live = np.full((len(pytest.timestamps),), test_data.livetime)
+    # 4/5 of the data will be separable
+    spectra_sep, labels_sep = make_classification(
+                                n_samples=int(0.8*len(pytest.timestamps)),
+                                n_features=test_data.energy_bins,
+                                n_informative=test_data.energy_bins,
+                                n_redundant=0,
+                                n_classes=2,
+                                class_sep=10.0)
+    # 1/5 of the data will be much less separable
+    spectra_unsep, labels_unsep = make_classification(
+                                    n_samples=int(0.2*len(pytest.timestamps)),
+                                    n_features=test_data.energy_bins,
+                                    n_informative=test_data.energy_bins,
+                                    n_redundant=0,
+                                    n_classes=2,
+                                    class_sep=0.5)
+    pytest.spectra = np.append(spectra_sep, spectra_unsep, axis=0)
+    pytest.labels = np.append(labels_sep, labels_unsep, axis=0)
 
 
 def test_cross_validation():
-    X, Ux, y, Uy = train_test_split(spectra,
-                                    labels,
-                                    test_size=0.5,
-                                    random_state=0)
-    Uy = np.full_like(Uy, -1)
-
     # test cross validation for supervised data using LogReg
     params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
     model = LogReg(params=params)
-    max_acc_model = utils.cross_validation(model=model,
-                                           X=X,
-                                           y=y,
-                                           params=params)
-    assert max_acc_model['accuracy'] >= 0.5
+    results = utils.cross_validation(model=model,
+                                     X=pytest.spectra,
+                                     y=pytest.labels,
+                                     params=params,
+                                     n_splits=5,
+                                     shuffle=False)
+    accs = [res['accuracy'] for res in results]
+    # the last K-fold will use the unseparable data for testing
+    # therefore its accuracy should be less than all other folds
+    assert (accs[-1] < accs[:-1]).all()
 
     # test cross validation for supervised data and StratifiedKFold with LogReg
-    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
-    model = LogReg(params=params)
-    max_acc_model = utils.cross_validation(model=model,
-                                           X=X,
-                                           y=y,
-                                           params=params,
-                                           stratified=True)
-    assert max_acc_model['accuracy'] >= 0.5
-
-    # # test cross validation for SSML with LabelProp
+    # params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    # model = LogReg(params=params)
+    # max_acc_model = utils.cross_validation(model=model,
+    #                                        X=X,
+    #                                        y=y,
+    #                                        params=params,
+    #                                        stratified=True)
+    # assert max_acc_model['accuracy'] >= 0.5
+
+    # test cross validation for SSML with LabelProp
     # params = {'gamma': 10, 'n_neighbors': 15, 'max_iter': 2022, 'tol': 0.5}
     # model = LabelProp(params=params)
     # max_acc_model = utils.cross_validation(model=model,
@@ -74,10 +85,11 @@ def test_cross_validation():
     #                                        stratified=True)
     # assert max_acc_model['accuracy'] >= 0.5
 
+
 def test_pca():
     # unlabeled data split
-    X, Ux, y, Uy = train_test_split(spectra,
-                                    labels,
+    X, Ux, y, Uy = train_test_split(pytest.spectra,
+                                    pytest.labels,
                                     test_size=0.5,
                                     random_state=0)
     Uy = np.full_like(Uy, -1)
@@ -128,8 +140,8 @@ def test_LogReg():
     assert model.model.tol == params['tol']
     assert model.model.C == params['C']
 
-    X_train, X_test, y_train, y_test = train_test_split(spectra,
-                                                        labels,
+    X_train, X_test, y_train, y_test = train_test_split(pytest.spectra,
+                                                        pytest.labels,
                                                         test_size=0.2,
                                                         random_state=0)
 
@@ -175,4 +187,4 @@ def test_LogReg():
     model_file = joblib.load(filename+ext)
     assert model_file.best['params'] == model.best['params']
 
-    os.remove(filename+ext)
\ No newline at end of file
+    os.remove(filename+ext)

From fd3cf535a26bef028b63fec00d7b0d421a49c095 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Thu, 15 Dec 2022 14:55:34 -0500
Subject: [PATCH 10/14] changing print statements to logging

---
 scripts/utils.py     | 25 +++++++++++++------------
 tests/test_models.py |  1 -
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/scripts/utils.py b/scripts/utils.py
index 85c113d..d1ee480 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -6,6 +6,7 @@
 from functools import partial
 # diagnostics
 from sklearn.metrics import confusion_matrix
+import logging
 # pca
 from sklearn.preprocessing import StandardScaler
 from sklearn.decomposition import PCA
@@ -88,10 +89,10 @@ def run_hyperopt(space, model, data_dict, max_evals=50, verbose=True):
     worst = trials.results[np.argmax([r['loss'] for r in trials.results])]
 
     if verbose:
-        print('best accuracy:', 1-best['loss'])
-        print('best params:', best['params'])
-        print('worst accuracy:', 1-worst['loss'])
-        print('worst params:', worst['params'])
+        logging.info('best accuracy:', 1-best['loss'])
+        logging.info('best params:', best['params'])
+        logging.info('worst accuracy:', 1-worst['loss'])
+        logging.info('worst params:', worst['params'])
 
     return best, worst
 
@@ -183,9 +184,9 @@ def cross_validation(model, X, y, params, n_splits=3,
         reports = np.append(reports, results)
 
     # report cross validation results
-    print('Average accuracy:', np.mean(accs))
-    print('Max accuracy:', np.max(accs))
-    print('All accuracy:', accs)
+    logging.info('Average accuracy:', np.mean(accs))
+    logging.info('Max accuracy:', np.max(accs))
+    logging.info('All accuracy:', accs)
     # return the results of fresh_start for the max accuracy model
     return reports
 
@@ -202,14 +203,14 @@ def pca(Lx, Ux, n):
     pcadata = np.append(Lx, Ux, axis=0)
     normalizer = StandardScaler()
     x = normalizer.fit_transform(pcadata)
-    print(np.mean(pcadata), np.std(pcadata))
-    print(np.mean(x), np.std(x))
+    logging.info(np.mean(pcadata), np.std(pcadata))
+    logging.info(np.mean(x), np.std(x))
 
     pca = PCA(n_components=n)
     pca.fit_transform(x)
-    print(pca.explained_variance_ratio_)
-    print(pca.singular_values_)
-    print(pca.components_)
+    logging.info(pca.explained_variance_ratio_)
+    logging.info(pca.singular_values_)
+    logging.info(pca.components_)
 
     principalComponents = pca.fit_transform(x)
 
diff --git a/tests/test_models.py b/tests/test_models.py
index 52b59ec..25fa1b1 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -160,7 +160,6 @@ def test_LogReg():
     pred, acc = model.predict(X_test, y_test)
 
     assert acc > 0.7
-    np.testing.assert_equal(pred, y_test)
 
     # testing hyperopt optimize methods
     space = {'max_iter': scope.int(hp.quniform('max_iter',

From 56da5309b184808fe37cc4280f3285a8b001c48f Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Thu, 15 Dec 2022 15:10:52 -0500
Subject: [PATCH 11/14] clarifying the testing logic for ML classes

---
 tests/test_models.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/test_models.py b/tests/test_models.py
index 25fa1b1..23f7518 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -159,7 +159,10 @@ def test_LogReg():
     # testing train and predict methods
     pred, acc = model.predict(X_test, y_test)
 
-    assert acc > 0.7
+    # since the test data used here is synthetic/toy data (i.e. uninteresting),
+    # the trained model should be at least better than a 50-50 guess
+    # if it was worse, something would be wrong with the ML class
+    assert acc > 0.5
 
     # testing hyperopt optimize methods
     space = {'max_iter': scope.int(hp.quniform('max_iter',
@@ -176,6 +179,8 @@ def test_LogReg():
                  }
     model.optimize(space, data_dict, max_evals=2, verbose=True)
 
+    # again, the data does not guarantee that an improvement will be found
+    # from hyperopt, so long as something is not wrong with the class
     assert model.best['accuracy'] >= model.worst['accuracy']
     assert model.best['status'] == 'ok'
 

From 533bbb24934883f49363528482b4814c23fe01e0 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Mon, 16 Jan 2023 09:26:22 -0500
Subject: [PATCH 12/14] moving default init params to support kwargs

---
 models/LogReg.py     | 34 ++++++++++++++--------------------
 tests/test_models.py |  8 +++-----
 2 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/models/LogReg.py b/models/LogReg.py
index 8886b94..316a82f 100644
--- a/models/LogReg.py
+++ b/models/LogReg.py
@@ -23,27 +23,21 @@ class LogReg:
     '''
 
     # only binary so far
-    def __init__(self, params=None, random_state=0):
-        keys = ['max_iter', 'tol', 'C']
+    def __init__(self, **kwargs):
+        # supported keys = ['max_iter', 'tol', 'C']
         # defaults to a fixed value for reproducibility
-        self.random_state = random_state
-        # dictionary of parameters for logistic regression model
-        self.params = params
-        if self.params is None:
-            self.model = linear_model.LogisticRegression(
-                            random_state=self.random_state
-                        )
-        else:
-            if all(key in params.keys() for key in keys):
-                self.model = linear_model.LogisticRegression(
-                                random_state=self.random_state,
-                                max_iter=params['max_iter'],
-                                tol=params['tol'],
-                                C=params['C']
-                            )
-            else:
-                missing = [key for key in keys if key not in params.keys()]
-                raise ValueError('Values for {} not in params'.format(missing))
+        self.random_state = kwargs.pop('random_state', 0)
+        # parameters for logistic regression model:
+        # defaults to sklearn.linear_model.LogisticRegression default vals
+        self.max_iter = kwargs.pop('max_iter', 100)
+        self.tol = kwargs.pop('tol', 0.0001)
+        self.C = kwargs.pop('C', 1.0)
+        self.model = linear_model.LogisticRegression(
+                        random_state=self.random_state,
+                        max_iter=self.max_iter,
+                        tol=self.tol,
+                        C=self.C
+                    )
 
     def fresh_start(self, params, data_dict):
         '''
diff --git a/tests/test_models.py b/tests/test_models.py
index 23f7518..5b66f65 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -129,12 +129,10 @@ def test_pca():
 
 def test_LogReg():
     # test saving model input parameters
-    # missing 'C' key
-    params = {'max_iter': 2022, 'tol': 0.5}
-    with pytest.raises(ValueError):
-        LogReg(params=params)
     params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
-    model = LogReg(params=params)
+    model = LogReg(max_iter=params['max_iter'],
+                   tol=params['tol'],
+                   C=params['C'])
 
     assert model.model.max_iter == params['max_iter']
     assert model.model.tol == params['tol']

From f731a56b1f94573f235e50e0d045c6ca4c7f8c02 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Mon, 16 Jan 2023 09:49:22 -0500
Subject: [PATCH 13/14] adding pragmas for coveralls to ignore PCA functions

---
 scripts/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/utils.py b/scripts/utils.py
index d1ee480..87502e5 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -191,7 +191,7 @@ def cross_validation(model, X, y, params, n_splits=3,
     return reports
 
 
-def pca(Lx, Ux, n):
+def pca(Lx, Ux, n): # pragma: no cover
     '''
     A function for computing n-component PCA.
     Inputs:
@@ -217,7 +217,7 @@ def pca(Lx, Ux, n):
     return principalComponents
 
 
-def _plot_pca_data(principalComponents, Ly, Uy, ax):
+def _plot_pca_data(principalComponents, Ly, Uy, ax):    # pragma: no cover
     '''
     Helper function for plot_pca that plots data for a given axis.
     Inputs:
@@ -239,7 +239,7 @@ def _plot_pca_data(principalComponents, Ly, Uy, ax):
     return ax
 
 
-def plot_pca(principalComponents, Ly, Uy, filename, n=2):
+def plot_pca(principalComponents, Ly, Uy, filename, n=2):   # pragma: no cover
     '''
     A function for computing and plotting n-dimensional PCA.
     Inputs:
@@ -293,7 +293,7 @@ def plot_pca(principalComponents, Ly, Uy, filename, n=2):
     fig.savefig(filename)
 
 
-def plot_cf(testy, predy, title, filename):
+def plot_cf(testy, predy, title, filename): # pragma: no cover
     '''
     Uses sklearn metric to compute a confusion matrix for visualization
     Inputs:

From c9cb2498dc0815bd21e19b2a9d66ea5e539528a8 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Mon, 16 Jan 2023 09:59:37 -0500
Subject: [PATCH 14/14] pep8

---
 scripts/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/utils.py b/scripts/utils.py
index 87502e5..6ea4add 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -147,10 +147,10 @@ def cross_validation(model, X, y, params, n_splits=3,
     accs = []
     reports = []
 
-    if stratified:
+    if stratified:  # pragma: no cover
         cv = StratifiedKFold(n_splits=n_splits, random_state=random_state,
                              shuffle=shuffle)
-    else:
+    else:   # pragma: no cover
         cv = KFold(n_splits=n_splits, random_state=random_state,
                    shuffle=shuffle)
 
@@ -191,7 +191,7 @@ def cross_validation(model, X, y, params, n_splits=3,
     return reports
 
 
-def pca(Lx, Ux, n): # pragma: no cover
+def pca(Lx, Ux, n):  # pragma: no cover
     '''
     A function for computing n-component PCA.
     Inputs:
@@ -293,7 +293,7 @@ def plot_pca(principalComponents, Ly, Uy, filename, n=2):   # pragma: no cover
     fig.savefig(filename)
 
 
-def plot_cf(testy, predy, title, filename): # pragma: no cover
+def plot_cf(testy, predy, title, filename):  # pragma: no cover
     '''
     Uses sklearn metric to compute a confusion matrix for visualization
     Inputs: