diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index d88f9c7..973f71c 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -41,7 +41,7 @@ jobs:
     - name: Test with pytest
       run: |
         python3 -m pytest
-        python3 -m coverage run --source=./RadClass/ -m pytest
+        python3 -m coverage run --source=./RadClass/,./models/,./scripts/ -m pytest
         python3 -m coverage report
         python3 -m coverage html
         COVERALLS_REPO_TOKEN=${{ secrets.COVERALLS_REPO_TOKEN }} python3 -m coveralls --service=github
diff --git a/README.md b/README.md
index b08bd07..42245fa 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,13 @@ Versions 3.6-3.9 are currently supported by tests. The following Python packages
 * h5py
 * numpy
 * progressbar2
+* matplotlib
+* seaborn
 * scipy
+* sklearn
+* hyperopt
+* torch
+* shadow-ssml
 
 Modules can be imported from the repository directory (e.g. `from RadClass.H0 import H0`) or `RadClass` can be installed using pip:
 
diff --git a/models/LogReg.py b/models/LogReg.py
new file mode 100644
index 0000000..4ebfce2
--- /dev/null
+++ b/models/LogReg.py
@@ -0,0 +1,162 @@
+# For hyperopt (parameter optimization)
+from hyperopt import STATUS_OK
+# sklearn models
+from sklearn import linear_model
+# diagnostics
+from sklearn.metrics import balanced_accuracy_score
+from scripts.utils import run_hyperopt
+import joblib
+
+
+class LogReg:
+    '''
+    Methods for deploying sklearn's logistic regression
+    implementation with hyperparameter optimization.
+    Data agnostic (i.e. user supplied data inputs).
+    TODO: Currently only supports binary classification.
+        Add multinomial functions and unit tests.
+        Add functionality for regression(?)
+    Inputs:
+    params: dictionary of logistic regression input functions.
+        keys max_iter, tol, and C supported.
+    random_state: int/float for reproducible intiailization.
+    '''
+
+    # only binary so far
+    def __init__(self, params=None, random_state=0):
+        # defaults to a fixed value for reproducibility
+        self.random_state = random_state
+        # dictionary of parameters for logistic regression model
+        self.params = params
+        if self.params is None:
+            self.model = linear_model.LogisticRegression(
+                            random_state=self.random_state
+                        )
+        else:
+            self.model = linear_model.LogisticRegression(
+                            random_state=self.random_state,
+                            max_iter=params['max_iter'],
+                            tol=params['tol'],
+                            C=params['C']
+                        )
+
+    def fresh_start(self, params, data_dict):
+        '''
+        Required method for hyperopt optimization.
+        Trains and tests a fresh logistic regression model
+        with given input parameters.
+        This method does not overwrite self.model (self.optimize() does).
+        Inputs:
+        params: dictionary of logistic regression input functions.
+            keys max_iter, tol, and C supported.
+        data_dict: compact data representation with the four requisite
+            data structures used for training and testing a model.
+            keys trainx, trainy, testx, and testy required.
+        '''
+
+        # unpack data
+        trainx = data_dict['trainx']
+        trainy = data_dict['trainy']
+        testx = data_dict['testx']
+        testy = data_dict['testy']
+
+        # supervised logistic regression
+        clf = LogReg(params=params, random_state=self.random_state)
+        # train and test model
+        clf.train(trainx, trainy)
+        # uses balanced_accuracy accounts for class imbalanced data
+        clf_pred, acc = clf.predict(testx, testy)
+
+        # loss function minimizes misclassification
+        return {'loss': 1-acc,
+                'status': STATUS_OK,
+                'model': clf.model,
+                'params': params,
+                'accuracy': acc}
+
+    def optimize(self, space, data_dict, max_evals=50, verbose=True):
+        '''
+        Wrapper method for using hyperopt (see utils.run_hyperopt
+        for more details). After hyperparameter optimization, results
+        are stored, the best model -overwrites- self.model, and the
+        best params -overwrite- self.params.
+        Inputs:
+        space: a hyperopt compliant dictionary with defined optimization
+            spaces. For example:
+                # quniform returns float, some parameters require int;
+                # use this to force int
+                space = {'max_iter': scope.int(hp.quniform('max_iter',
+                                                           10,
+                                                           10000,
+                                                           10)),
+                        'tol'      : hp.loguniform('tol', 1e-5, 1e-1),
+                        'C'        : hp.uniform('C', 0.001,1000.0)
+                        }
+            See hyperopt docs for more information.
+        data_dict: compact data representation with the four requisite
+            data structures used for training and testing a model.
+            keys trainx, trainy, testx, testy required.
+        max_evals: the number of epochs for hyperparameter optimization.
+            Each iteration is one set of hyperparameters trained
+            and tested on a fresh model. Convergence for simpler
+            models like logistic regression typically happens well
+            before 50 epochs, but can increase as more complex models,
+            more hyperparameters, and a larger hyperparameter space is tested.
+        verbose: boolean. If true, print results of hyperopt.
+            If false, print only the progress bar for optimization.
+        '''
+
+        best, worst = run_hyperopt(space=space,
+                                   model=self.fresh_start,
+                                   data_dict=data_dict,
+                                   max_evals=max_evals,
+                                   verbose=verbose)
+
+        # save the results of hyperparameter optimization
+        self.best = best
+        self.model = best['model']
+        self.params = best['params']
+        self.worst = worst
+
+    def train(self, trainx, trainy):
+        '''
+        Wrapper method for sklearn's logisitic regression training method.
+        Inputs:
+        trainx: nxm feature vector/matrix for training model.
+        trainy: nxk class label vector/matrix for training model.
+        '''
+
+        # supervised logistic regression
+        self.model.fit(trainx, trainy)
+
+    def predict(self, testx, testy=None):
+        '''
+        Wrapper method for sklearn's logistic regression predict method.
+        Inputs:
+        testx: nxm feature vector/matrix for testing model.
+        testy: nxk class label vector/matrix for training model.
+            optional: if included, the predicted classes -and-
+            the resulting classification accuracy will be returned.
+        '''
+
+        pred = self.model.predict(testx)
+
+        acc = None
+        if testy is not None:
+            # uses balanced_accuracy_score to account for class imbalance
+            acc = balanced_accuracy_score(testy, pred)
+
+        return pred, acc
+
+    def save(self, filename):
+        '''
+        Save class instance to file using joblib.
+        Inputs:
+        filename: string filename to save object to file under.
+            The file must be saved with extension .joblib.
+            Added to filename if not included as input.
+        '''
+
+        if filename[-7:] != '.joblib':
+            filename += '.joblib'
+        joblib.dump(self, filename)
diff --git a/models/SSML/CoTraining.py b/models/SSML/CoTraining.py
new file mode 100644
index 0000000..e6757bd
--- /dev/null
+++ b/models/SSML/CoTraining.py
@@ -0,0 +1,335 @@
+import numpy as np
+import matplotlib.pyplot as plt
+# For hyperopt (parameter optimization)
+from hyperopt import STATUS_OK
+# sklearn models
+from sklearn import linear_model
+# diagnostics
+from sklearn.metrics import balanced_accuracy_score
+from scripts.utils import run_hyperopt
+import joblib
+
+
+class CoTraining:
+    '''
+    Methods for deploying a basic co-training with logistic
+    regression implementation with hyperparameter optimization.
+    Data agnostic (i.e. user supplied data inputs).
+    TODO: Currently only supports binary classification.
+        Add multinomial functions and unit tests.
+        Add functionality for regression(?)
+    Inputs:
+    params: dictionary of logistic regression input functions.
+        keys max_iter, tol, and C supported.
+    random_state: int/float for reproducible intiailization.
+    '''
+
+    # only binary so far
+    def __init__(self, params=None, random_state=0):
+        # defaults to a fixed value for reproducibility
+        self.random_state = random_state
+        # dictionary of parameters for logistic regression model
+        self.params = params
+        if self.params is None:
+            self.model1 = linear_model.LogisticRegression(
+                            random_state=self.random_state)
+            self.model2 = linear_model.LogisticRegression(
+                            random_state=self.random_state)
+            # default needed for training
+            self.params = {'n_samples': 1}
+        else:
+            self.model1 = linear_model.LogisticRegression(
+                            random_state=self.random_state,
+                            max_iter=params['max_iter'],
+                            tol=params['tol'],
+                            C=params['C']
+                        )
+            self.model2 = linear_model.LogisticRegression(
+                            random_state=self.random_state,
+                            max_iter=params['max_iter'],
+                            tol=params['tol'],
+                            C=params['C']
+                        )
+
+    def training_loop(self, slr1, slr2, L_lr1, L_lr2,
+                      Ly_lr1, Ly_lr2, U_lr, n_samples,
+                      testx=None, testy=None):
+        '''
+        Main training iteration for co-training.
+        Given two models, labeled training data, and unlabeled training data:
+        - Train both models using their respective labeled datasets
+        - Randomly sample n_samples number of unlabeled
+            instances for model 1 and 2 each.
+        - Label the sampled unlabeled instances using
+            model 1 (u1) and model 2 (u2).
+        - Remove u1 and u2 from the unlabeled dataset and
+            include in each model's respective labeled dataset
+            with their associated labels for future training.
+        Inputs:
+        slr1: logistic regression co-training model #1
+        slr2: logistic regression co-training model #2
+        L_lr1: feature training data for co-training model #1
+        L_lr2: feature training data for co-training model #2
+        Ly_lr1: labels for input data for co-training model #1
+        Ly_lr2: labels for input data for co-training model #2
+        U_lr: unlabeled feature training data used by both models
+        n_samples: the number of instances to sample and
+            predict from Ux at one time
+        testx: feature vector/matrix used for testing the performance
+            of each model at every iteration.
+        testy: label vector used for testing the performance
+            of each model at every iteration.
+        '''
+
+        model1_accs, model2_accs = np.array([]), np.array([])
+        # should stay false but if true,
+        # the same unalbeled instance could be sampled multiple times
+        rep = False
+        while U_lr.shape[0] > 1:
+            slr1.fit(L_lr1, Ly_lr1)
+            slr2.fit(L_lr2, Ly_lr2)
+
+            # pull u1
+            # ensuring there is enough instances to sample for each model
+            if U_lr.shape[0] < n_samples*2:
+                n_samples = int(U_lr.shape[0]/2)
+            uidx1 = np.random.choice(range(U_lr.shape[0]),
+                                     n_samples,
+                                     replace=rep)
+            u1 = U_lr[uidx1].copy()
+            # remove instances that will be labeled
+            U_lr = np.delete(U_lr, uidx1, axis=0)
+
+            # pull u2
+            uidx2 = np.random.choice(range(U_lr.shape[0]),
+                                     n_samples,
+                                     replace=rep)
+            u2 = U_lr[uidx2].copy()
+            # remove instances that will be labeled
+            U_lr = np.delete(U_lr, uidx2, axis=0)
+
+            # predict unlabeled samples
+            u1y = slr1.predict(u1)
+            u2y = slr2.predict(u2)
+
+            if testx is not None and testy is not None:
+                # test and save model(s) accuracy over all training iterations
+                model1_accs = np.append(model1_accs,
+                                        balanced_accuracy_score(testy,
+                                                                slr1.predict(
+                                                                    testx)))
+                model2_accs = np.append(model2_accs,
+                                        balanced_accuracy_score(testy,
+                                                                slr2.predict(
+                                                                    testx)))
+
+            # add predictions to cotrained model(s) labeled samples
+            L_lr1 = np.append(L_lr1, u2, axis=0)
+            L_lr2 = np.append(L_lr2, u1, axis=0)
+            Ly_lr1 = np.append(Ly_lr1, u2y, axis=0)
+            Ly_lr2 = np.append(Ly_lr2, u1y, axis=0)
+
+        return slr1, slr2, model1_accs, model2_accs
+
+    def fresh_start(self, params, data_dict):
+        '''
+        Required method for hyperopt optimization.
+        Trains and tests a fresh co-training model
+        with given input parameters.
+        This method does not overwrite self.model (self.optimize() does).
+        Inputs:
+        params: dictionary of logistic regression input functions.
+            keys n_samples, max_iter, tol, and C supported.
+        data_dict: compact data representation with the four requisite
+            data structures used for training and testing a model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        '''
+
+        # unpack data
+        trainx = data_dict['trainx']
+        trainy = data_dict['trainy']
+        testx = data_dict['testx']
+        testy = data_dict['testy']
+        # unlabeled co-training data
+        Ux = data_dict['Ux']
+
+        clf = CoTraining(params=params, random_state=self.random_state)
+        # training and testing
+        model1_accs, model2_accs = clf.train(trainx, trainy, Ux, testx, testy)
+        # uses balanced_accuracy accounts for class imbalanced data
+        pred1, acc, pred2, model1_acc, model2_acc = clf.predict(testx, testy)
+
+        return {'loss': 1-acc,
+                'status': STATUS_OK,
+                'model': clf.model1,
+                'model2': clf.model2,
+                'model1_acc_history': model1_accs,
+                'model2_acc_history': model2_accs,
+                'params': params,
+                'accuracy': acc}
+
+    def optimize(self, space, data_dict, max_evals=50, verbose=True):
+        '''
+        Wrapper method for using hyperopt (see utils.run_hyperopt
+        for more details). After hyperparameter optimization, results
+        are stored, the best model -overwrites- self.model, and the
+        best params -overwrite- self.params.
+        Inputs:
+        space: a hyperopt compliant dictionary with defined optimization
+            spaces. For example:
+                # quniform returns float, some parameters require int;
+                # use this to force int
+                space = {'max_iter' : scope.int(hp.quniform('max_iter',
+                                                            10,
+                                                            10000,
+                                                            10)),
+                        'tol'       : hp.loguniform('tol', 1e-5, 1e-3),
+                        'C'         : hp.uniform('C', 1.0, 1000.0),
+                        'n_samples' : scope.int(hp.quniform('n_samples',
+                                                            1,
+                                                            20,
+                                                            1))
+                        }
+            See hyperopt docs for more information.
+        data_dict: compact data representation with the five requisite
+            data structures used for training and testing an SSML model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        max_evals: the number of epochs for hyperparameter optimization.
+            Each iteration is one set of hyperparameters trained
+            and tested on a fresh model. Convergence for simpler
+            models like logistic regression typically happens well
+            before 50 epochs, but can increase as more complex models,
+            more hyperparameters, and a larger hyperparameter space is tested.
+        verbose: boolean. If true, print results of hyperopt.
+            If false, print only the progress bar for optimization.
+        '''
+
+        best, worst = run_hyperopt(space=space,
+                                   model=self.fresh_start,
+                                   data_dict=data_dict,
+                                   max_evals=max_evals,
+                                   verbose=verbose)
+
+        # save the results of hyperparameter optimization
+        self.best = best
+        self.model = best['model']
+        self.params = best['params']
+        self.worst = worst
+
+    def train(self, trainx, trainy, Ux,
+              testx=None, testy=None):
+        '''
+        Wrapper method for a basic co-training with logistic regression
+        implementation training method.
+        Inputs:
+        trainx: nxm feature vector/matrix for training model.
+        trainy: nxk class label vector/matrix for training model.
+        Ux: feature vector/matrix like labeled trainx but unlabeled data.
+        testx: feature vector/matrix used for testing the performance
+            of each model at every iteration.
+        testy: label vector used for testing the performance
+            of each model at every iteration.
+        '''
+
+        # avoid overwriting when deleting in co-training loop
+        U_lr = Ux.copy()
+
+        # set the random seed of training splits for reproducibility
+        # This can be ignored by excluding params['seed']
+        # in the hyperopt space dictionary
+        if 'seed' in self.params.keys():
+            np.random.seed(self.params['seed'])
+
+        # TODO: allow a user to specify uneven splits between the two models
+        split_frac = 0.5
+        # labeled training data
+        idx = np.random.choice(range(trainy.shape[0]),
+                               size=int(split_frac * trainy.shape[0]),
+                               replace=False)
+
+        # avoid overwriting when deleting in co-training loop
+        L_lr1 = trainx[idx].copy()
+        L_lr2 = trainx[~idx].copy()
+        Ly_lr1 = trainy[idx].copy()
+        Ly_lr2 = trainy[~idx].copy()
+
+        self.model1, self.model2, model1_accs, model2_accs = \
+            self.training_loop(
+                                self.model1, self.model2,
+                                L_lr1, L_lr2,
+                                Ly_lr1, Ly_lr2,
+                                U_lr, self.params['n_samples'],
+                                testx, testy,
+                                )
+
+        # optional returns if a user is interested in training diagnostics
+        return model1_accs, model2_accs
+
+    def predict(self, testx, testy=None):
+        '''
+        Wrapper method for sklearn's Label Propagation predict method.
+        Inputs:
+        testx: nxm feature vector/matrix for testing model.
+        testy: nxk class label vector/matrix for training model.
+            optional: if included, the predicted classes -and-
+            the resulting classification accuracy will be returned.
+        '''
+
+        pred1 = self.model1.predict(testx)
+        pred2 = self.model2.predict(testx)
+
+        acc = None
+        if testy is not None:
+            # balanced_accuracy accounts for class imbalanced data
+            # could alternatively use pure accuracy
+            # for a more traditional hyperopt
+            model1_acc = balanced_accuracy_score(testy, pred1)
+            model2_acc = balanced_accuracy_score(testy, pred2)
+            # select best accuracy for hyperparameter optimization
+            acc = max(model1_acc, model2_acc)
+
+        return pred1, acc, pred2, model1_acc, model2_acc
+
+    def plot_cotraining(self, model1_accs=None, model2_accs=None,
+                        filename='lr-cotraining-learningcurves.png'):
+        '''
+        Plots the training error curves for two co-training models.
+        NOTE: The user must provide the curves to plot, but each curve is
+            saved by the class under self.best and self.worst models.
+        Inputs:
+        filename: name to store picture under.
+            Must end in .png (or will be added if missing).
+        model1_accs: the accuracy scores over training epochs for model 1
+        model2_accs: the accuracy scores over training epochs for model 2
+        '''
+
+        fig, ax = plt.subplots(figsize=(10, 8), dpi=300)
+        ax.plot(np.arange(len(model1_accs)), model1_accs,
+                color='tab:blue', label='Model 1')
+        ax.plot(np.arange(len(model2_accs)), model2_accs,
+                color='tab:orange', label='Model 2')
+        ax.legend()
+        ax.set_xlabel('Co-Training Iteration')
+        ax.set_ylabel('Test Accuracy')
+        ax.grid()
+
+        if filename[-4:] != '.png':
+            filename += '.png'
+        fig.savefig(filename)
+
+    def save(self, filename):
+        '''
+        Save class instance to file using joblib.
+        Inputs:
+        filename: string filename to save object to file under.
+            The file must be saved with extension .joblib.
+            Added to filename if not included as input.
+        '''
+
+        if filename[-7:] != '.joblib':
+            filename += '.joblib'
+        joblib.dump(self, filename)
diff --git a/models/SSML/LabelProp.py b/models/SSML/LabelProp.py
new file mode 100644
index 0000000..cb9ff05
--- /dev/null
+++ b/models/SSML/LabelProp.py
@@ -0,0 +1,186 @@
+import numpy as np
+# For hyperopt (parameter optimization)
+from hyperopt import STATUS_OK
+# sklearn models
+from sklearn import semi_supervised
+# diagnostics
+from sklearn.metrics import balanced_accuracy_score
+from scripts.utils import run_hyperopt
+import joblib
+
+
+class LabelProp:
+    '''
+    Methods for deploying sklearn's Label Propagation
+    implementation with hyperparameter optimization.
+    Data agnostic (i.e. user supplied data inputs).
+    NOTE: Since LabelProp is guaranteed to converge given
+        enough iterations, there is no random_state defined.
+    TODO: Currently only supports binary classification.
+        Add multinomial functions and unit tests.
+        Add functionality for regression(?)
+    Inputs:
+    params: dictionary of logistic regression input functions.
+        keys gamma, n_neighbors, max_iter, and tol supported.
+    '''
+
+    # only binary so far
+    def __init__(self, params=None, random_state=0):
+        # defaults to a fixed value for reproducibility
+        self.random_state = random_state
+        # dictionary of parameters for logistic regression model
+        self.params = params
+        if self.params is None:
+            # defaults:
+            # knn kernel, although an rbf is equally valid
+            # TODO: allow rbf kernels
+            # n_jobs, use parallelization if available.
+            self.model = semi_supervised.LabelPropagation(
+                            kernel='knn',
+                            n_jobs=-1
+                        )
+        else:
+            self.model = semi_supervised.LabelPropagation(
+                            kernel='knn',
+                            gamma=params['gamma'],
+                            n_neighbors=params['n_neighbors'],
+                            max_iter=params['max_iter'],
+                            tol=params['tol'],
+                            n_jobs=-1
+                        )
+
+    def fresh_start(self, params, data_dict):
+        '''
+        Required method for hyperopt optimization.
+        Trains and tests a fresh Label Propagation model
+        with given input parameters.
+        This method does not overwrite self.model (self.optimize() does).
+        Inputs:
+        params: dictionary of logistic regression input functions.
+            keys max_iter, tol, and C supported.
+        data_dict: compact data representation with the five requisite
+            data structures used for training and testing an SSML model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        '''
+
+        # unpack data
+        trainx = data_dict['trainx']
+        trainy = data_dict['trainy']
+        testx = data_dict['testx']
+        testy = data_dict['testy']
+        Ux = data_dict['Ux']
+
+        clf = LabelProp(params, random_state=self.random_state)
+        # training and testing
+        clf.train(trainx, trainy, Ux)
+        # uses balanced_accuracy accounts for class imbalanced data
+        pred, acc = clf.predict(testx, testy)
+
+        # loss function minimizes misclassification
+        return {'loss': 1-acc,
+                'status': STATUS_OK,
+                'model': clf.model,
+                'params': params,
+                'accuracy': acc}
+
+    def optimize(self, space, data_dict, max_evals=50, verbose=True):
+        '''
+        Wrapper method for using hyperopt (see utils.run_hyperopt
+        for more details). After hyperparameter optimization, results
+        are stored, the best model -overwrites- self.model, and the
+        best params -overwrite- self.params.
+        Inputs:
+        space: a hyperopt compliant dictionary with defined optimization
+            spaces. For example:
+                # quniform returns float, some parameters require int;
+                # use this to force int
+                space = {'max_iter'  : scope.int(hp.quniform('max_iter',
+                                                             10,
+                                                             10000,
+                                                             10)),
+                        'tol'        : hp.loguniform('tol', 1e-6, 1e-4),
+                        'gamma'      : hp.uniform('gamma', 1, 50),
+                        'n_neighbors': scope.int(hp.quniform('n_neighbors',
+                                                             1,
+                                                             200,
+                                                             1))
+                        }
+            See hyperopt docs for more information.
+        data_dict: compact data representation with the five requisite
+            data structures used for training and testing an SSML model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        max_evals: the number of epochs for hyperparameter optimization.
+            Each iteration is one set of hyperparameters trained
+            and tested on a fresh model. Convergence for simpler
+            models like logistic regression typically happens well
+            before 50 epochs, but can increase as more complex models,
+            more hyperparameters, and a larger hyperparameter space is tested.
+        verbose: boolean. If true, print results of hyperopt.
+            If false, print only the progress bar for optimization.
+        '''
+
+        best, worst = run_hyperopt(space=space,
+                                   model=self.fresh_start,
+                                   data_dict=data_dict,
+                                   max_evals=max_evals,
+                                   verbose=verbose)
+
+        # save the results of hyperparameter optimization
+        self.best = best
+        self.model = best['model']
+        self.params = best['params']
+        self.worst = worst
+
+    def train(self, trainx, trainy, Ux):
+        '''
+        Wrapper method for sklearn's Label Propagation training method.
+        Inputs:
+        trainx: nxm feature vector/matrix for training model.
+        trainy: nxk class label vector/matrix for training model.
+        Ux: feature vector/matrix like labeled trainx but unlabeled data.
+        '''
+
+        # combine labeled and unlabeled instances for training
+        lp_trainx = np.append(trainx, Ux, axis=0)
+        lp_trainy = np.append(trainy,
+                              np.full(shape=(Ux.shape[0],), fill_value=-1),
+                              axis=0)
+
+        # semi-supervised Label Propagation
+        self.model.fit(lp_trainx, lp_trainy)
+
+    def predict(self, testx, testy=None):
+        '''
+        Wrapper method for sklearn's Label Propagation predict method.
+        Inputs:
+        testx: nxm feature vector/matrix for testing model.
+        testy: nxk class label vector/matrix for training model.
+            optional: if included, the predicted classes -and-
+            the resulting classification accuracy will be returned.
+        '''
+
+        pred = self.model.predict(testx)
+
+        acc = None
+        if testy is not None:
+            # uses balanced_accuracy_score to account for class imbalance
+            acc = balanced_accuracy_score(testy, pred)
+
+        return pred, acc
+
+    def save(self, filename):
+        '''
+        Save class instance to file using joblib.
+        Inputs:
+        filename: string filename to save object to file under.
+            The file must be saved with extension .joblib.
+            Added to filename if not included as input.
+        '''
+
+        if filename[-7:] != '.joblib':
+            filename += '.joblib'
+        joblib.dump(self, filename)
diff --git a/models/SSML/ShadowCNN.py b/models/SSML/ShadowCNN.py
new file mode 100644
index 0000000..a633283
--- /dev/null
+++ b/models/SSML/ShadowCNN.py
@@ -0,0 +1,436 @@
+import numpy as np
+import matplotlib.pyplot as plt
+# For hyperopt (parameter optimization)
+from hyperopt import STATUS_OK
+# torch imports
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+# shadow imports
+import shadow.eaat
+import shadow.losses
+import shadow.utils
+from shadow.utils import set_seed
+# diagnostics
+from scripts.utils import EarlyStopper, run_hyperopt
+import joblib
+
+
+class Net(nn.Module):
+    '''
+    Neural Network constructor.
+    Also includes method for forward pass.
+    nn.Module: PyTorch object for neural networks.
+    Inputs:
+    layer1: int length for first layer.
+    layer2: int length for second layer.
+        Ideally a multiple of layer1.
+    layer3: int length for third layer.
+        Ideally a multiple of layer2.
+    kernel: convolutional kernel size.
+        NOTE: An optimal value is unclear for spectral data.
+    drop_rate: float (<1.) probability for reset/dropout layer.
+    length: single instance data length.
+        NOTE: Assumed to be 1000 for spectral data.
+    TODO: Allow hyperopt to optimize on arbitrary sized networks.
+    '''
+
+    def __init__(self, layer1=32, layer2=64, layer3=128,
+                 kernel=3, drop_rate=0.1, length=1000):
+        '''
+        Defines the structure for each type of layer.
+        The resulting network has fixed length but the
+        user can input arbitrary widths.
+        '''
+
+        # default max_pool1d kernel set by Shadow MNIST example
+        # NOTE: max_pool1d sets mp_kernel = mp_stride
+        self.mp_kernel = 2
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv1d(1, layer1, kernel, 1)
+        self.conv2 = nn.Conv1d(layer1, layer2, kernel, 1)
+        self.dropout = nn.Dropout(drop_rate)
+        # calculating the number of parameters/weights before the flattened
+        # fully-connected layer:
+        #   first, there are two convolution layers, so the output length is
+        #   the input length (feature_vector.shape[0] - 2_layers*(kernel-1))
+        #   if, in the future, more layers are desired, 2 must be adjusted
+        #   next, calculate the output of the max_pool1d layer, which is
+        #   round((conv_out - (kernel=stride - 1) - 1)/2 + 1)
+        #   finally, multiply this by the number of channels in the last
+        #   convolutional layer = layer2
+        conv_out = length-2*(kernel-1)
+        parameters = layer2*(
+                        ((conv_out - (self.mp_kernel - 1) - 1)//self.mp_kernel)
+                        + 1)
+        self.fc1 = nn.Linear(int(parameters), layer3)
+        self.fc2 = nn.Linear(layer3, 2)
+
+    def forward(self, x):
+        '''
+        The resulting network has a fixed length with
+        two convolutional layers divided by relu activation,
+        a max pooling layer, a dropout layer, and two
+        fully-connected layers separated by a relu and
+        dropout layers.
+        '''
+
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.max_pool1d(x, self.mp_kernel)
+        x = self.dropout(x)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.dropout(x)
+        x = self.fc2(x)
+        return x
+
+
+class SpectralDataset(torch.utils.data.Dataset):
+    '''
+    Dataset loader for use with PyTorch NN training.
+    torch.utils.data.Dataset: managing user input data for random sampling.
+    Inputs:
+    trainD: the nxm input vector/matrix of data.
+    labels: associated label vector for data.
+    '''
+
+    def __init__(self, trainD, labels):
+        self.labels = labels
+        self.trainD = trainD
+
+    def __len__(self):
+        '''
+        Define what length is for the Dataset
+        '''
+
+        return len(self.labels)
+
+    def __getitem__(self, idx):
+        '''
+        Define how to retrieve an instance from a dataset.
+        Inputs:
+        idx: the index to sample from.
+        '''
+
+        label = self.labels[idx]
+        data = self.trainD[idx]
+        # no need to bother with labels, unpacking both anyways
+        # sample = {"Spectrum": data, "Class": label}
+        # return sample
+        return data, label
+
+
+class ShadowCNN:
+    '''
+    Methods for deploying a Shadow CNN
+    implementation with hyperparameter optimization.
+    Data agnostic (i.e. user supplied data inputs).
+    TODO: Currently only supports binary classification.
+        Add multinomial functions and unit tests.
+        Add functionality for regression(?)
+    Inputs:
+    params: dictionary of logistic regression input functions.
+        keys binning, hidden_layer, alpha, xi, eps, lr, and momentum
+        are supported.
+    TODO: Include functionality for manipulating other
+        CNN architecture parameters in hyperparameter optimization
+    random_state: int/float for reproducible intiailization.
+    length: int input length (i.e. dimensions of feature vectors)
+    TODO: Add input parameter, loss_function, for the other
+        loss function options available in Shadow (besides EAAT).
+    '''
+
+    # only binary so far
+    def __init__(self, params=None, random_state=0, length=1000):
+        # defaults to a fixed value for reproducibility
+        self.random_state = random_state
+        # set seeds for reproducibility
+        set_seed(0)
+        # device used for computation
+        self.device = torch.device("cuda" if
+                                   torch.cuda.is_available() else "cpu")
+        # dictionary of parameters for logistic regression model
+        self.params = params
+        if self.params is not None:
+            # assumes the input dimensions are measurements of 1000 bins
+            # TODO: Abstract this for arbitrary input size
+            self.model = Net(layer1=params['layer1'],
+                             layer2=2*params['layer1'],
+                             layer3=3*params['layer1'],
+                             kernel=params['kernel'],
+                             drop_rate=params['drop_rate'],
+                             length=np.ceil(length/params['binning']))
+            self.eaat = shadow.eaat.EAAT(model=self.model,
+                                         alpha=params['alpha'],
+                                         xi=params['xi'],
+                                         eps=params['eps'])
+            self.optimizer = optim.SGD(self.eaat.parameters(),
+                                       lr=params['lr'],
+                                       momentum=params['momentum'])
+        else:
+            # fixed value defaults needed by training algorithm
+            self.params = {'binning': 1, 'batch_size': 1}
+            # assumes the input dimensions are measurements of 1000 bins
+            # TODO: Abstract this for arbitrary input size
+            self.model = Net()
+            self.eaat = shadow.eaat.EAAT(model=self.model)
+            self.optimizer = optim.SGD(self.eaat.parameters(),
+                                       lr=0.1, momentum=0.9)
+
+    def fresh_start(self, params, data_dict):
+        '''
+        Required method for hyperopt optimization.
+        Trains and tests a fresh Shadow NN model
+        with given input parameters.
+        This method does not overwrite self.model (self.optimize() does).
+        Inputs:
+        params: dictionary of logistic regression input functions.
+            keys binning, layer1, alpha, xi, eps, lr, momentum,
+            kernel, drop_rate, and batch_size are supported.
+        data_dict: compact data representation with the four requisite
+            data structures used for training and testing a model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        '''
+
+        self.params = params
+        # unpack data
+        trainx = data_dict['trainx']
+        trainy = data_dict['trainy']
+        testx = data_dict['testx']
+        testy = data_dict['testy']
+        # unlabeled co-training data
+        Ux = data_dict['Ux']
+
+        clf = ShadowCNN(params=params,
+                        random_state=self.random_state,
+                        length=trainx.shape[1])
+        # training and testing
+        losscurve, evalcurve = clf.train(trainx, trainy, Ux, testx, testy)
+        # not used; max acc in past few epochs used instead
+        y_pred, acc = clf.predict(testx, testy)
+        max_acc = np.max(evalcurve[-25:])
+
+        return {'loss': 1-(max_acc/100.0),
+                'status': STATUS_OK,
+                'model': clf.eaat,
+                'params': params,
+                'losscurve': losscurve,
+                'evalcurve': evalcurve,
+                'accuracy': (max_acc/100.0)}
+
+    def optimize(self, space, data_dict, max_evals=50, verbose=True):
+        '''
+        Wrapper method for using hyperopt (see utils.run_hyperopt
+        for more details). After hyperparameter optimization, results
+        are stored, the best model -overwrites- self.model, and the
+        best params -overwrite- self.params.
+        Inputs:
+        space: a hyperopt compliant dictionary with defined optimization
+            spaces. For example:
+                # quniform returns float, some parameters require int;
+                # use this to force int
+                space = {'layer1'        : scope.int(hp.quniform('layer1',
+                                                                 1000,
+                                                                 10000,
+                                                                 10)),
+                         'kernel'        : scope.int(hp.quniform('kernel',
+                                                                 1,
+                                                                 9,
+                                                                 1)),
+                         'alpha'        : hp.uniform('alpha', 0.0001, 0.999),
+                         'xi'           : hp.uniform('xi', 1e-2, 1e0),
+                         'eps'          : hp.uniform('eps', 0.5, 1.5),
+                         'lr'           : hp.uniform('lr', 1e-3, 1e-1),
+                         'momentum'     : hp.uniform('momentum', 0.5, 0.99),
+                         'binning'      : scope.int(hp.quniform('binning',
+                                                                1,
+                                                                10,
+                                                                1)),
+                         'batch_size'   : scope.int(hp.quniform('batch_size',
+                                                                1,
+                                                                100,
+                                                                1))
+                        }
+            See hyperopt docs for more information.
+        data_dict: compact data representation with the five requisite
+            data structures used for training and testing an SSML model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        max_evals: the number of epochs for hyperparameter optimization.
+            Each iteration is one set of hyperparameters trained
+            and tested on a fresh model. Convergence for simpler
+            models like logistic regression typically happens well
+            before 50 epochs, but can increase as more complex models,
+            more hyperparameters, and a larger hyperparameter space is tested.
+        verbose: boolean. If true, print results of hyperopt.
+            If false, print only the progress bar for optimization.
+        '''
+
+        best, worst = run_hyperopt(space=space,
+                                   model=self.fresh_start,
+                                   data_dict=data_dict,
+                                   max_evals=max_evals,
+                                   verbose=verbose)
+
+        # save the results of hyperparameter optimization
+        self.best = best
+        self.model = best['model']
+        self.params = best['params']
+        self.worst = worst
+
+    def train(self, trainx, trainy, Ux, testx=None, testy=None):
+        '''
+        Wrapper method for Shadow NN training method.
+        Inputs:
+        trainx: nxm feature vector/matrix for training model.
+        trainy: nxk class label vector/matrix for training model.
+        Ux: feature vector/matrix like labeled trainx but unlabeled data.
+        testx: feature vector/matrix used for testing the performance
+            of each model at every iteration.
+        testy: label vector used for testing the performance
+            of each model at every iteration.
+        '''
+
+        # avoid float round-off by using DoubleTensor
+        xtens = torch.FloatTensor(np.append(trainx,
+                                            Ux,
+                                            axis=0))[:,
+                                                     ::self.params['binning']]
+        # xtens[xtens == 0.0] = torch.unique(xtens)[1]/1e10
+        ytens = torch.LongTensor(np.append(trainy,
+                                           np.full(shape=(Ux.shape[0],),
+                                                   fill_value=-1),
+                                           axis=0))
+
+        # define data set object
+        dataset = SpectralDataset(xtens, ytens)
+
+        # create DataLoader object of DataSet object
+        DL_DS = torch.utils.data.DataLoader(dataset,
+                                            batch_size=self.params[
+                                                        'batch_size'
+                                                        ],
+                                            shuffle=True)
+
+        # labels for unlabeled data are always "-1"
+        xEnt = torch.nn.CrossEntropyLoss(ignore_index=-1)
+
+        # generate early-stopping watchdog
+        # TODO: allow a user of ShadowCNN to specify EarlyStopper's params
+        stopper = EarlyStopper(patience=3, min_delta=0)
+        n_epochs = 100
+        self.eaat.to(self.device)
+        losscurve = []
+        evalcurve = []
+        for epoch in range(n_epochs):
+            self.eaat.train()
+            lossavg = []
+            for i, (data, targets) in enumerate(DL_DS):
+                x = data.reshape((data.shape[0],
+                                  1,
+                                  data.shape[1])).to(self.device)
+                y = targets.to(self.device)
+                self.optimizer.zero_grad()
+                out = self.eaat(x)
+                loss = xEnt(out, y) + self.eaat.get_technique_cost(x)
+                loss.backward()
+                self.optimizer.step()
+                lossavg.append(loss.item())
+            losscurve.append(np.nanmedian(lossavg))
+            if testx is not None and testy is not None:
+                pred, acc = self.predict(testx, testy)
+                evalcurve.append(acc)
+
+                self.eaat.train()
+                # test for early stopping
+                x_val = torch.FloatTensor(
+                                    testx.copy()[:, ::self.params['binning']])
+                x_val = x_val.reshape((x_val.shape[0],
+                                       1,
+                                       x_val.shape[1])).to(self.device)
+                y_val = torch.LongTensor(testy).to(self.device)
+                out = self.eaat(x_val)
+                val_loss = xEnt(out, y_val) + \
+                    self.eaat.get_technique_cost(x_val)
+                if stopper.early_stop(val_loss):
+                    break
+
+        # optionally return the training accuracy if test data was provided
+        return losscurve, evalcurve
+
+    def predict(self, testx, testy=None):
+        '''
+        Wrapper method for Shadow NN predict method.
+        Inputs:
+        testx: nxm feature vector/matrix for testing model.
+        testy: nxk class label vector/matrix for training model.
+            optional: if included, the predicted classes -and-
+            the resulting classification accuracy will be returned.
+        binning: int number of bins sampled in feature vector
+        '''
+
+        self.eaat.eval()
+        y_pred, y_true = [], []
+        for i, data in enumerate(torch.FloatTensor(
+                                    testx.copy()[:, ::self.params['binning']])
+                                 ):
+            x = data.reshape((1, 1, data.shape[0])).to(self.device)
+            out = self.eaat(x)
+            y_pred.extend(torch.argmax(out, 1).detach().cpu().tolist())
+        acc = None
+        if testy is not None:
+            y_true = torch.LongTensor(testy.copy())
+            acc = (np.array(y_true) == np.array(y_pred)).mean() * 100
+
+        return y_pred, acc
+
+    def plot_training(self, losscurve=None, evalcurve=None,
+                      filename='lr-cotraining-learningcurves.png'):
+        '''
+        Plots the training error curves for two co-training models.
+        NOTE: The user must provide the curves to plot, but each curve is
+            saved by the class under self.best and self.worst models.
+        Inputs:
+        filename: name to store picture under.
+            Must end in .png (or will be added if missing).
+        losscurve: the loss value over training epochs
+        evalcurve: the accuracy scores over training epochs
+        '''
+
+        fig, (ax1, ax2) = plt.subplots(2,
+                                       1,
+                                       sharex=True,
+                                       figsize=(10, 8),
+                                       dpi=300)
+        ax1.plot(losscurve)
+        ax2.plot(evalcurve)
+        ax1.set_xlabel('Epoch')
+        ax2.set_xlabel('Epoch')
+        ax1.set_ylabel('Loss Curve')
+        ax2.set_ylabel('Accuracy')
+        ax1.grid()
+        ax2.grid()
+
+        if filename[-4:] != '.png':
+            filename += '.png'
+        fig.savefig(filename)
+
+    def save(self, filename):
+        '''
+        Save class instance to file using joblib.
+        Inputs:
+        filename: string filename to save object to file under.
+            The file must be saved with extension .joblib.
+            Added to filename if not included as input.
+        '''
+
+        if filename[-7:] != '.joblib':
+            filename += '.joblib'
+        joblib.dump(self, filename)
diff --git a/models/SSML/ShadowNN.py b/models/SSML/ShadowNN.py
new file mode 100644
index 0000000..4857ccf
--- /dev/null
+++ b/models/SSML/ShadowNN.py
@@ -0,0 +1,282 @@
+import numpy as np
+# For hyperopt (parameter optimization)
+from hyperopt import STATUS_OK
+# torch imports
+import torch
+# shadow imports
+import shadow.eaat
+import shadow.losses
+import shadow.utils
+from shadow.utils import set_seed
+# diagnostics
+from scripts.utils import EarlyStopper, run_hyperopt
+import joblib
+
+
+class ShadowNN:
+    '''
+    Methods for deploying a Shadow fully-connected NN
+    implementation with hyperparameter optimization.
+    Data agnostic (i.e. user supplied data inputs).
+    TODO: Currently only supports binary classification.
+        Add multinomial functions and unit tests.
+        Add functionality for regression(?)
+    Inputs:
+    params: dictionary of logistic regression input functions.
+        keys binning, hidden_layer, alpha, xi, eps, lr, and momentum
+        are supported.
+    random_state: int/float for reproducible intiailization.
+    TODO: Add input parameter, loss_function, for the other
+        loss function options available in Shadow (besides EAAT).
+    '''
+
+    # only binary so far
+    def __init__(self, params=None, random_state=0, input_length=1000):
+        # defaults to a fixed value for reproducibility
+        self.random_state = random_state
+        self.input_length = input_length
+        # set seeds for reproducibility
+        set_seed(0)
+        # device used for computation
+        self.device = torch.device("cuda" if
+                                   torch.cuda.is_available() else "cpu")
+        # dictionary of parameters for logistic regression model
+        self.params = params
+        if self.params is not None:
+            # assumes the input dimensions are measurements of 1000 bins
+            # TODO: Abstract this for arbitrary input size
+            self.eaat = shadow.eaat.EAAT(model=self.model_factory(
+                                            int(np.ceil(
+                                                self.input_length /
+                                                params['binning'])),
+                                            params['hidden_layer']),
+                                         alpha=params['alpha'],
+                                         xi=params['xi'],
+                                         eps=params['eps']).to(self.device)
+            self.eaat_opt = torch.optim.SGD(self.eaat.parameters(),
+                                            lr=params['lr'],
+                                            momentum=params['momentum'])
+            # unlabeled instances always have a label of "-1"
+            self.xEnt = torch.nn.CrossEntropyLoss(
+                            ignore_index=-1).to(self.device)
+        else:
+            self.params = {'binning': 1}
+            # assumes the input dimensions are measurements of 1000 bins
+            self.eaat = shadow.eaat.EAAT(
+                            model=self.model_factory()).to(self.device)
+            self.eaat_opt = torch.optim.SGD(self.eaat.parameters(),
+                                            lr=0.1, momentum=0.9)
+            # unlabeled instances always have a label of "-1"
+            self.xEnt = torch.nn.CrossEntropyLoss(
+                            ignore_index=-1).to(self.device)
+
+    def model_factory(self, length=1000, hidden_layer=10000):
+        return torch.nn.Sequential(
+            torch.nn.Linear(length, hidden_layer),
+            torch.nn.ReLU(),
+            torch.nn.Linear(hidden_layer, length),
+            torch.nn.ReLU(),
+            torch.nn.Linear(length, 2)
+        )
+
+    def fresh_start(self, params, data_dict):
+        '''
+        Required method for hyperopt optimization.
+        Trains and tests a fresh Shadow NN model
+        with given input parameters.
+        This method does not overwrite self.model (self.optimize() does).
+        Inputs:
+        params: dictionary of logistic regression input functions.
+            keys binning, hidden_layer, alpha, xi, eps, lr, and momentum
+            are supported.
+        data_dict: compact data representation with the four requisite
+            data structures used for training and testing a model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        '''
+
+        # unpack data
+        trainx = data_dict['trainx']
+        trainy = data_dict['trainy']
+        testx = data_dict['testx']
+        testy = data_dict['testy']
+        # unlabeled co-training data
+        Ux = data_dict['Ux']
+
+        clf = ShadowNN(params=params,
+                       random_state=self.random_state,
+                       input_length=testx.shape[1])
+        # training and testing
+        acc_history = clf.train(trainx, trainy, Ux, testx, testy)
+        # not used; max acc in past few epochs used instead
+        eaat_pred, acc = clf.predict(testx, testy)
+        max_acc = np.max(acc_history[-20:])
+
+        return {'loss': 1-(max_acc/100.0),
+                'status': STATUS_OK,
+                'model': clf.eaat,
+                'params': params,
+                'accuracy': (max_acc/100.0)}
+
+    def optimize(self, space, data_dict, max_evals=50, verbose=True):
+        '''
+        Wrapper method for using hyperopt (see utils.run_hyperopt
+        for more details). After hyperparameter optimization, results
+        are stored, the best model -overwrites- self.model, and the
+        best params -overwrite- self.params.
+        Inputs:
+        space: a hyperopt compliant dictionary with defined optimization
+            spaces. For example:
+                # quniform returns float, some parameters require int;
+                # use this to force int
+                space = {'hidden_layer' : scope.int(hp.quniform('hidden_layer',
+                                                        1000,
+                                                        10000,
+                                                        10)),
+                         'alpha'        : hp.uniform('alpha', 0.0001, 0.999),
+                         'xi'           : hp.uniform('xi', 1e-2, 1e0),
+                         'eps'          : hp.uniform('eps', 0.5, 1.5),
+                         'lr'           : hp.uniform('lr', 1e-3, 1e-1),
+                         'momentum'     : hp.uniform('momentum', 0.5, 0.99),
+                         'binning'      : scope.int(hp.quniform('binning',
+                                                                1,
+                                                                10,
+                                                                1))
+                        }
+            See hyperopt docs for more information.
+        data_dict: compact data representation with the five requisite
+            data structures used for training and testing an SSML model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        max_evals: the number of epochs for hyperparameter optimization.
+            Each iteration is one set of hyperparameters trained
+            and tested on a fresh model. Convergence for simpler
+            models like logistic regression typically happens well
+            before 50 epochs, but can increase as more complex models,
+            more hyperparameters, and a larger hyperparameter space is tested.
+        verbose: boolean. If true, print results of hyperopt.
+            If false, print only the progress bar for optimization.
+        '''
+
+        best, worst = run_hyperopt(space=space,
+                                   model=self.fresh_start,
+                                   data_dict=data_dict,
+                                   max_evals=max_evals,
+                                   verbose=verbose)
+
+        # save the results of hyperparameter optimization
+        self.best = best
+        self.model = best['model']
+        self.params = best['params']
+        self.worst = worst
+
+    def train(self, trainx, trainy, Ux, testx=None, testy=None):
+        '''
+        Wrapper method for Shadow NN training method.
+        Inputs:
+        trainx: nxm feature vector/matrix for training model.
+        trainy: nxk class label vector/matrix for training model.
+        Ux: feature vector/matrix like labeled trainx but unlabeled data.
+        testx: feature vector/matrix used for testing the performance
+            of each model at every iteration.
+        testy: label vector used for testing the performance
+            of each model at every iteration.
+        '''
+
+        # avoid float round-off by using DoubleTensor
+        xtens = torch.FloatTensor(np.append(trainx,
+                                            Ux,
+                                            axis=0)[:,
+                                                    ::self.params['binning']])
+        # xtens[xtens == 0.0] = torch.unique(xtens)[1]/1e10
+        ytens = torch.LongTensor(np.append(trainy,
+                                           np.full(shape=(Ux.shape[0],),
+                                                   fill_value=-1),
+                                           axis=0))
+
+        n_epochs = 100
+        xt = torch.Tensor(xtens).to(self.device)
+        yt = torch.LongTensor(ytens).to(self.device)
+        # generate early-stopping watchdog
+        # TODO: allow a user of ShadowCNN to specify EarlyStopper's params
+        stopper = EarlyStopper(patience=3, min_delta=0)
+        # saves history for max accuracy
+        acc_history = []
+        for epoch in range(n_epochs):
+            # set the model into training mode
+            # NOTE: change this to .eval() mode for testing and back again
+            self.eaat.train()
+            # Forward/backward pass for training semi-supervised model
+            out = self.eaat(xt)
+            # supervised + unsupervised loss
+            loss = self.xEnt(out, yt) + self.eaat.get_technique_cost(xt)
+            self.eaat_opt.zero_grad()
+            loss.backward()
+            self.eaat_opt.step()
+
+            if testx is not None and testy is not None:
+                x_val = torch.FloatTensor(
+                            testx.copy()
+                        )[:, ::self.params['binning']].to(self.device)
+                y_val = torch.LongTensor(testy.copy()).to(self.device)
+
+                self.eaat.eval()
+                eaat_pred = torch.max(self.eaat(x_val), 1)[-1]
+                acc = shadow.losses.accuracy(eaat_pred,
+                                             y_val
+                                             ).data.item()
+                acc_history.append(acc)
+
+                self.eaat.train()
+                # test for early stopping
+                out = self.eaat(x_val)
+                val_loss = self.xEnt(out, y_val) + \
+                    self.eaat.get_technique_cost(x_val)
+                if stopper.early_stop(val_loss):
+                    break
+
+        # optionally return the training accuracy if test data was provided
+        return acc_history
+
+    def predict(self, testx, testy=None):
+        '''
+        Wrapper method for Shadow NN predict method.
+        Inputs:
+        testx: nxm feature vector/matrix for testing model.
+        testy: nxk class label vector/matrix for training model.
+            optional: if included, the predicted classes -and-
+            the resulting classification accuracy will be returned.
+        '''
+
+        self.eaat.eval()
+        eaat_pred = torch.max(self.eaat(
+                                torch.FloatTensor(
+                                    testx.copy()[:, ::self.params['binning']]
+                                    ).to(self.device)
+                                ), 1)[-1]
+
+        acc = None
+        if testy is not None:
+            acc = shadow.losses.accuracy(eaat_pred,
+                                         torch.LongTensor(
+                                            testy.copy()).to(self.device)
+                                         ).data.item()
+
+        # return tensor to cpu if on gpu and convert to numpy for return
+        eaat_pred = eaat_pred.cpu().numpy()
+        return eaat_pred, acc
+
+    def save(self, filename):
+        '''
+        Save class instance to file using joblib.
+        Inputs:
+        filename: string filename to save object to file under.
+            The file must be saved with extension .joblib.
+            Added to filename if not included as input.
+        '''
+
+        if filename[-7:] != '.joblib':
+            filename += '.joblib'
+        joblib.dump(self, filename)
diff --git a/models/SSML/__init__.py b/models/SSML/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/requirements.txt b/requirements.txt
index 06d1c3a..8b22315 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,10 @@ numpy
 h5py
 progressbar2
 scipy>=1.7.0
+scikit-learn
+hyperopt
+matplotlib
+seaborn
+joblib
+torch
+shadow-ssml
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/utils.py b/scripts/utils.py
new file mode 100644
index 0000000..d91c826
--- /dev/null
+++ b/scripts/utils.py
@@ -0,0 +1,328 @@
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+# For hyperopt (parameter optimization)
+from hyperopt import Trials, tpe, fmin
+from functools import partial
+# diagnostics
+from sklearn.metrics import confusion_matrix
+# pca
+from sklearn.preprocessing import StandardScaler
+from sklearn.decomposition import PCA
+# Cross Validation
+from sklearn.model_selection import KFold, StratifiedKFold
+
+
+class EarlyStopper:
+    '''
+    Early stopping mechanism for neural networks.
+    Code adapted from user "isle_of_gods" from StackOverflow:
+    https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch
+    Use this class to break a training loop if the validation loss is low.
+    Inputs:
+    patience: integer; forces stop if validation loss has not improved
+        for some time
+    min_delta: "fudge value" for how much loss to tolerate before stopping
+    '''
+
+    def __init__(self, patience=1, min_delta=0):
+        self.patience = patience
+        self.min_delta = min_delta
+        self.counter = 0
+        self.min_validation_loss = np.inf
+
+    def early_stop(self, validation_loss):
+        '''
+        Tests for the early stopping condition if the validation loss
+        has not improved for a certain period of time (patience).
+        Inputs:
+        validation_loss: typically a float value for the loss function of
+            a neural network training loop
+        '''
+
+        if validation_loss < self.min_validation_loss:
+            # keep track of the smallest validation loss
+            # if it has been beaten, restart patience
+            self.min_validation_loss = validation_loss
+            self.counter = 0
+        elif validation_loss > (self.min_validation_loss + self.min_delta):
+            # keep track of whether validation loss has been decreasing
+            # by a tolerable amount
+            self.counter += 1
+            if self.counter >= self.patience:
+                return True
+        return False
+
+
+def run_hyperopt(space, model, data_dict, max_evals=50, verbose=True):
+    '''
+    Runs hyperparameter optimization on a model given a parameter space.
+    Inputs:
+    space: dictionary with each hyperparameter as keys and values being the
+        range of parameter space (see hyperopt docs for defining a space)
+    mode: function that takes params dictionary, trains a specified ML model
+        and returns the optimization loss function, model, and other
+        attributes (e.g. accuracy on evaluation set)
+    max_eval: (int) run hyperparameter optimization for max_val iterations
+    verbose: report best and worse loss/accuracy
+
+    Returns:
+    best: dictionary with returns from model function, including best loss,
+        best trained model, best parameters, etc.
+    worst: dictionary with returns from model function, including worst loss,
+        worst trained model, worst parameters, etc.
+    '''
+
+    trials = Trials()
+
+    # wrap data into objective function
+    fmin_objective = partial(model, data_dict=data_dict)
+
+    # run hyperopt
+    fmin(fmin_objective,
+         space,
+         algo=tpe.suggest,
+         max_evals=max_evals,
+         trials=trials)
+
+    # of all trials, find best and worst loss/accuracy from optimization
+    best = trials.results[np.argmin([r['loss'] for r in trials.results])]
+    worst = trials.results[np.argmax([r['loss'] for r in trials.results])]
+
+    if verbose:
+        print('best accuracy:', 1-best['loss'])
+        print('best params:', best['params'])
+        print('worst accuracy:', 1-worst['loss'])
+        print('worst params:', worst['params'])
+
+    return best, worst
+
+
+def cross_validation(model, X, y, params, n_splits=3,
+                     stratified=False, random_state=None):
+    '''
+    Perform K-Fold cross validation using sklearn and a given model.
+    The model *must* have a fresh_start method (see models in RadClass/models).
+    fresh_start() is used instead of train() to be agnostic to the data needed
+        for training (fresh_start requires a data_dict whereas each model's
+        train could take different combinations of labeled & unlabeled data).
+        This also avoids the need to do hyperparameter optimization (and
+        therefore many training epochs) for every K-Fold.
+    NOTE: fresh_start returns the model and results in a dictionary but
+        does not overwrite/save the model to the respective class.
+        You can manually overwrite using model.model = return.model
+    Hyperparameter optimization (model.optimize) can be done before or after
+        cross validation to specify the (optimal) parameters used by the model
+        since they are required here.
+    NOTE: Fixed default to shuffle data during cross validation splits.
+        (See sklearn cross validation docs for more info.)
+    NOTE: Unlabeled data, if provided, will always be included in the training
+        dataset. This means that this cross validation implementation is
+        susceptible to bias in the unlabeled data distribution. To test for
+        this bias, a user can manually run cross validation as a parent to
+        calling this function, splitting the unlabeled data and adding
+        different folds into X.
+    Inputs:
+    model: ML model class object (e.g. RadClass/models).
+        Must have a fresh_start() method.
+        NOTE: If the model expects unlabeled data but unlabed data is not
+        provided in X/y, an error will likely be thrown when training the model
+        through fresh_start.
+    X: array of feature vectors (rows of individual instances, cols of vectors)
+        This should include all data for training and testing (since the
+        testing subset will be split by cross validation), including unlabeled
+        data if needed/used.
+    y: array/vector of labels for X. If including unlabeled data, use -1.
+        This should have the same order as X. That is, each row index in X
+        has an associated label with the same index in y.
+    params: dictionary of hyperparameters. Will depend on model used.
+        Alternatively, use model.params for models in RadClass/models
+    n_splits: int number of splits for K-Fold cross validation
+    stratified: bool; if True, balance the K-Folds to have roughly the same
+        proportion of samples from each class.
+    random_state: seed for reproducility.
+    '''
+
+    # return lists
+    accs = []
+    reports = []
+
+    if stratified:
+        cv = StratifiedKFold(n_splits=n_splits, random_state=random_state,
+                             shuffle=True)
+    else:
+        cv = KFold(n_splits=n_splits, random_state=random_state,
+                   shuffle=True)
+
+    # separate unlabeled data if included
+    Ux = None
+    Uy = None
+    if -1 in y:
+        U_idx = np.where(y == -1)[0]
+        L_idx = np.where(y != -1)[0]
+        Ux = X[U_idx]
+        Uy = y[U_idx]
+        Lx = X[L_idx]
+        Ly = y[L_idx]
+    else:
+        Lx = X
+        Ly = y
+    # conduct K-Fold cross validation
+    cv.get_n_splits(Lx, Ly)
+    for train_idx, test_idx in cv.split(Lx, Ly):
+        trainx, testx = Lx[train_idx], Lx[test_idx]
+        trainy, testy = Ly[train_idx], Ly[test_idx]
+
+        # construct data dictionary for training in fresh_start
+        data_dict = {'trainx': trainx, 'trainy': trainy,
+                     'testx': testx, 'testy': testy}
+        if Ux is not None:
+            data_dict['Ux'] = Ux
+            data_dict['Uy'] = Uy
+        results = model.fresh_start(params, data_dict)
+        accs = np.append(accs, results['accuracy'])
+        reports = np.append(reports, results)
+
+    # report cross validation results
+    print('Average accuracy:', np.mean(accs))
+    print('Max accuracy:', np.max(accs))
+    print('All accuracy:', accs)
+    # return the results of fresh_start for the max accuracy model
+    return reports[np.argmax(accs)]
+
+
+def pca(Lx, Ly, Ux, Uy, filename):
+    '''
+    A function for computing and plotting 2D PCA.
+    Inputs:
+    Lx: labeled feature data.
+    Ly: class labels for labeled data.
+    Ux: unlabeled feature data.
+    Uy: labels for unlabeled data (all labels should be -1).
+    filename: filename for saved plot.
+        The file must be saved with extension .joblib.
+        Added to filename if not included as input.
+    '''
+
+    plt.rcParams.update({'font.size': 20})
+    # only saving colors for binary classification with unlabeled instances
+    col_dict = {-1: 'tab:gray', 0: 'tab:orange', 1: 'tab:blue'}
+
+    pcadata = np.append(Lx, Ux, axis=0)
+    normalizer = StandardScaler()
+    x = normalizer.fit_transform(pcadata)
+    print(np.mean(pcadata), np.std(pcadata))
+    print(np.mean(x), np.std(x))
+
+    pca = PCA(n_components=2)
+    pca.fit_transform(x)
+    print(pca.explained_variance_ratio_)
+    print(pca.singular_values_)
+    print(pca.components_)
+
+    principalComponents = pca.fit_transform(x)
+
+    fig, ax = plt.subplots(figsize=(10, 8))
+    ax.set_xlabel('Principal Component 1', fontsize=15)
+    ax.set_ylabel('Principal Component 2', fontsize=15)
+    for idx, color in col_dict.items():
+        indices = np.where(np.append(Ly, Uy, axis=0) == idx)[0]
+        ax.scatter(principalComponents[indices, 0],
+                   principalComponents[indices, 1],
+                   c=color,
+                   label='class '+str(idx))
+    ax.grid()
+    ax.legend()
+
+    if filename[-4:] != '.png':
+        filename += '.png'
+    fig.tight_layout()
+    fig.savefig(filename)
+
+
+def multiD_pca(Lx, Ly, Ux, Uy, filename, n=2):
+    '''
+    A function for computing and plotting n-dimensional PCA.
+    Inputs:
+    Lx: labeled feature data.
+    Ly: class labels for labeled data.
+    Ux: unlabeled feature data.
+    Uy: labels for unlabeled data (all labels should be -1).
+    filename: filename for saved plot.
+        The file must be saved with extension .joblib.
+        Added to filename if not included as input.
+    n: number of singular values to include in PCA analysis.
+    '''
+
+    plt.rcParams.update({'font.size': 20})
+    # only saving colors for binary classification with unlabeled instances
+    col_dict = {-1: 'tab:gray', 0: 'tab:orange', 1: 'tab:blue'}
+
+    pcadata = np.append(Lx, Ux, axis=0)
+    normalizer = StandardScaler()
+    x = normalizer.fit_transform(pcadata)
+    print(np.mean(pcadata), np.std(pcadata))
+    print(np.mean(x), np.std(x))
+
+    n = 2
+    pca = PCA(n_components=n)
+    principalComponents = pca.fit_transform(x)
+    print(pca.explained_variance_ratio_)
+    print(pca.singular_values_)
+    print(pca.components_)
+
+    alph = ["A", "B", "C", "D", "E", "F", "G", "H",
+            "I", "J", "K", "L", "M", "N", "O", "P",
+            "Q", "R", "S", "T", "U", "V", "W", "X",
+            "Y", "Z"]
+    jobs = alph[:n]
+
+    fig, axes = plt.subplots(n, n, figsize=(15, 15))
+
+    for row in range(axes.shape[0]):
+        for col in range(axes.shape[1]):
+            ax = axes[row, col]
+            if row == col:
+                ax.tick_params(
+                    axis='both', which='both',
+                    bottom='off', top='off',
+                    labelbottom='off',
+                    left='off', right='off',
+                    labelleft='off'
+                )
+                ax.text(0.5, 0.5, jobs[row], horizontalalignment='center')
+            else:
+                for idx, color in col_dict.items():
+                    indices = np.where(np.append(Ly, Uy, axis=0) == idx)[0]
+                    ax.scatter(principalComponents[indices, row],
+                               principalComponents[indices, col],
+                               c=color,
+                               label='class '+str(idx))
+    fig.tight_layout()
+    if filename[-4:] != '.png':
+        filename += '.png'
+    fig.savefig(filename)
+
+
+def plot_cf(testy, predy, title, filename):
+    '''
+    Uses sklearn metric to compute a confusion matrix for visualization
+    Inputs:
+    testy: array/vector with ground-truth labels for test/evaluation set
+    predy: array/vector with predicted sample labels from trained model
+    title: string title for plot
+    filename: string with extension for confusion matrix file
+    '''
+
+    cf_matrix = confusion_matrix(testy, predy)
+    ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues')
+
+    ax.set_title(title)
+    ax.set_xlabel('\nPredicted Values')
+    ax.set_ylabel('Actual Values ')
+
+    # Ticket labels - List must be in alphabetical order
+    ax.xaxis.set_ticklabels(['0(SNM)', '1(other)'])
+    ax.yaxis.set_ticklabels(['0(SNM)', '1(other)'])
+    # Save the visualization of the Confusion Matrix.
+    plt.savefig(filename)
diff --git a/tests/test_BackgroundEstimator.py b/tests/test_BackgroundEstimator.py
index 2d10c89..efc1299 100644
--- a/tests/test_BackgroundEstimator.py
+++ b/tests/test_BackgroundEstimator.py
@@ -77,7 +77,6 @@ def test_write():
     bckg.write(ofilename=ofilename)
 
     results = np.loadtxt(fname=ofilename+'.csv', delimiter=',')
-    print(results)
 
     # the resulting observation should be:
     #   counts * integration / live-time
diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000..e3fb086
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,491 @@
+# diagnostics
+import numpy as np
+from datetime import datetime, timedelta
+# testing models
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+import tests.test_data as test_data
+# hyperopt
+from hyperopt.pyll.base import scope
+from hyperopt import hp
+# testing utils
+import scripts.utils as utils
+# models
+from models.LogReg import LogReg
+from models.SSML.CoTraining import CoTraining
+from models.SSML.LabelProp import LabelProp
+from models.SSML.ShadowNN import ShadowNN
+from models.SSML.ShadowCNN import ShadowCNN
+# testing write
+import joblib
+import os
+
+# initialize sample data
+start_date = datetime(2019, 2, 2)
+delta = timedelta(seconds=1)
+timestamps = np.arange(start_date,
+                       start_date + (test_data.timesteps * delta),
+                       delta).astype('datetime64[s]').astype('float64')
+
+live = np.full((len(timestamps),), test_data.livetime)
+sample_val = 1.0
+spectra = np.full((len(timestamps), test_data.energy_bins),
+                  np.full((1, test_data.energy_bins), sample_val))
+# setting up for rejected null hypothesis
+rejected_H0_time = np.random.choice(spectra.shape[0],
+                                    test_data.timesteps//2,
+                                    replace=False)
+spectra[rejected_H0_time] = 100.0
+
+labels = np.full((spectra.shape[0],), 0)
+labels[rejected_H0_time] = 1
+
+
+def test_utils():
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    Uy = np.full_like(Uy, -1)
+
+    # test cross validation for supervised data using LogReg
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    model = LogReg(params=params)
+    max_acc_model = utils.cross_validation(model=model,
+                                           X=X,
+                                           y=y,
+                                           params=params)
+    assert max_acc_model['accuracy'] >= 0.5
+
+    # test cross validation for supervised data and StratifiedKFold with LogReg
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    model = LogReg(params=params)
+    max_acc_model = utils.cross_validation(model=model,
+                                           X=X,
+                                           y=y,
+                                           params=params,
+                                           stratified=True)
+    assert max_acc_model['accuracy'] >= 0.5
+
+    # test cross validation for SSML with LabelProp
+    params = {'gamma': 10, 'n_neighbors': 15, 'max_iter': 2022, 'tol': 0.5}
+    model = LabelProp(params=params)
+    max_acc_model = utils.cross_validation(model=model,
+                                           X=np.append(X, Ux, axis=0),
+                                           y=np.append(y, Uy, axis=0),
+                                           params=params,
+                                           stratified=True)
+    assert max_acc_model['accuracy'] >= 0.5
+
+    # data split for data visualization
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    filename = 'test_pca'
+    utils.pca(X_train, y_train, Ux, np.full_like(Uy, -1), filename)
+    os.remove(filename+'.png')
+
+    filename = 'test_multiD_pca'
+    utils.multiD_pca(X_train, y_train, Ux, np.full_like(Uy, -1), filename, n=5)
+    os.remove(filename+'.png')
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+
+    # default behavior
+    model = LogReg(params=None, random_state=0)
+    model.train(X_train, y_train)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    filename = 'test_cf'
+    utils.plot_cf(y_test, pred, title=filename, filename=filename)
+    os.remove(filename+'.png')
+
+
+def test_LogReg():
+    # test saving model input parameters
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    model = LogReg(params=params)
+
+    assert model.model.max_iter == params['max_iter']
+    assert model.model.tol == params['tol']
+    assert model.model.C == params['C']
+
+    X_train, X_test, y_train, y_test = train_test_split(spectra,
+                                                        labels,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+
+    # default behavior
+    model = LogReg(params=None, random_state=0)
+    model.train(X_train, y_train)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    assert acc > 0.7
+    np.testing.assert_equal(pred, y_test)
+
+    # testing hyperopt optimize methods
+    space = {'max_iter': scope.int(hp.quniform('max_iter',
+                                               10,
+                                               10000,
+                                               10)),
+             'tol': hp.loguniform('tol', 1e-5, 1e-1),
+             'C': hp.uniform('C', 0.001, 1000.0)
+             }
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+    assert model.best['status'] == 'ok'
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)
+
+
+def test_CoTraining():
+    # test saving model input parameters
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    model = CoTraining(params=params)
+
+    assert model.model1.max_iter == params['max_iter']
+    assert model.model1.tol == params['tol']
+    assert model.model1.C == params['C']
+
+    assert model.model2.max_iter == params['max_iter']
+    assert model.model2.tol == params['tol']
+    assert model.model2.C == params['C']
+
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+    Ux = normalizer.transform(Ux)
+
+    # default behavior
+    model = CoTraining(params=None, random_state=0)
+    model.train(X_train, y_train, Ux)
+
+    # testing train and predict methods
+    pred, acc, *_ = model.predict(X_test, y_test)
+
+    assert acc > 0.7
+    np.testing.assert_equal(pred, y_test)
+
+    # testing hyperopt optimize methods
+    space = {'max_iter': scope.int(hp.quniform('max_iter',
+                                               10,
+                                               10000,
+                                               10)),
+             'tol': hp.loguniform('tol', 1e-5, 1e-3),
+             'C': hp.uniform('C', 1.0, 1000.0),
+             'n_samples': scope.int(hp.quniform('n_samples',
+                                                1,
+                                                20,
+                                                1)),
+             'seed': 0
+             }
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test,
+                 'Ux': Ux
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+    assert model.best['status'] == 'ok'
+
+    # testing model plotting method
+    filename = 'test_plot'
+    model.plot_cotraining(model1_accs=model.best['model1_acc_history'],
+                          model2_accs=model.best['model2_acc_history'],
+                          filename=filename)
+    os.remove(filename+'.png')
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)
+
+
+def test_LabelProp():
+    # test saving model input parameters
+    params = {'gamma': 10, 'n_neighbors': 15, 'max_iter': 2022, 'tol': 0.5}
+    model = LabelProp(params=params)
+
+    assert model.model.gamma == params['gamma']
+    assert model.model.n_neighbors == params['n_neighbors']
+    assert model.model.max_iter == params['max_iter']
+    assert model.model.tol == params['tol']
+
+    # there should be no normalization on LabelProp data
+    # since it depends on the distances between samples
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # default behavior
+    model = LabelProp(params=None, random_state=0)
+    model.train(X_train, y_train, Ux)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    # the default n_neighbors(=7) from sklearn is too large
+    # for the size of this dataset
+    # therefore the accuracy is expected to be poor
+    # a better value for this dataset would be n_neighbors=2
+    # (tested when specifying params in LabelProp.__init__)
+    assert acc >= 0.5
+    # uninteresting test if LabelProp predicts all one class
+    # TODO: make the default params test meaningful
+    assert np.count_nonzero(pred == y_test) > 0
+
+    # testing hyperopt optimize methods
+    space = {'max_iter': scope.int(hp.quniform('max_iter',
+                                               10,
+                                               10000,
+                                               10)),
+             'tol': hp.loguniform('tol', 1e-6, 1e-4),
+             'gamma': hp.uniform('gamma', 1, 50),
+             'n_neighbors': scope.int(hp.quniform('n_neighbors',
+                                                  1,
+                                                  X_train.shape[0],
+                                                  1))
+             }
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test,
+                 'Ux': Ux
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+    assert model.best['status'] == 'ok'
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)
+
+
+def test_ShadowNN():
+    # check default parameter settings
+    model = ShadowNN()
+    assert model.params == {'binning': 1}
+    assert model.eaat is not None
+    assert model.eaat_opt is not None
+    assert model.xEnt is not None
+    assert model.input_length == 1000
+
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+    Ux = normalizer.transform(Ux)
+
+    params = {'hidden_layer': 10,
+              'alpha': 0.1,
+              'xi': 1e-3,
+              'eps': 1.0,
+              'lr': 0.1,
+              'momentum': 0.9,
+              'binning': 20}
+    # default behavior
+    model = ShadowNN(params=params, random_state=0)
+    acc_history = model.train(X_train, y_train, Ux, X_test, y_test)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    # test for agreement between training and testing
+    # (since the same data is used for diagnostics in this test)
+    assert acc_history[-1] == acc
+
+    # Shadow/PyTorch reports accuracies as percentages
+    # rather than decimals
+    # uninteresting test if Shadow predicts all one class
+    # TODO: make the default params test meaningful
+    assert np.count_nonzero(pred == y_test) > 0
+
+    # testing hyperopt optimize methods
+    space = {'hidden_layer': 10,
+             'alpha': 0.1,
+             'xi': 1e-3,
+             'eps': 1.0,
+             'lr': 0.1,
+             'momentum': 0.9,
+             'binning': scope.int(hp.quniform('binning',
+                                              10,
+                                              20,
+                                              1))
+             }
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test,
+                 'Ux': Ux
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+    assert model.best['status'] == 'ok'
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)
+
+
+def test_ShadowCNN():
+    # check default parameter settings
+    model = ShadowCNN()
+    assert model.params == {'binning': 1, 'batch_size': 1}
+    assert model.model is not None
+    assert model.eaat is not None
+    assert model.optimizer is not None
+
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+    Ux = normalizer.transform(Ux)
+
+    params = {'layer1': 2,
+              'kernel': 3,
+              'alpha': 0.1,
+              'xi': 1e-3,
+              'eps': 1.0,
+              'lr': 0.1,
+              'momentum': 0.9,
+              'binning': 20,
+              'batch_size': 4,
+              'drop_rate': 0.1}
+
+    # default behavior
+    model = ShadowCNN(params=params, random_state=0)
+    losscurve, evalcurve = model.train(X_train, y_train, Ux, X_test, y_test)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    # test for agreement between training and testing
+    # (since the same data is used for diagnostics in this test)
+    assert evalcurve[-1] == acc
+
+    # Shadow/PyTorch reports accuracies as percentages
+    # rather than decimals
+    # uninteresting test if Shadow predicts all one class
+    # TODO: make the default params test meaningful
+    assert np.count_nonzero(pred == y_test) > 0
+
+    # testing hyperopt optimize methods
+    space = params
+    space['binning'] = scope.int(hp.quniform('binning',
+                                 10,
+                                 20,
+                                 1))
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test,
+                 'Ux': Ux
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+    assert model.best['status'] == 'ok'
+
+    # testing model plotting method
+    filename = 'test_plot'
+    model.plot_training(losscurve=model.best['losscurve'],
+                        evalcurve=model.best['evalcurve'],
+                        filename=filename)
+    os.remove(filename+'.png')
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)