diff --git a/example_defenses/spectogram_preprocessing.py b/example_defenses/spectogram_preprocessing.py
new file mode 100644
index 0000000..fa8e470
--- /dev/null
+++ b/example_defenses/spectogram_preprocessing.py
@@ -0,0 +1,104 @@
+import numpy as np
+from scipy import signal
+from typing import Optional, Tuple
+
+from art.defences.preprocessor import Preprocessor
+
+
+class Spectogram(Preprocessor):
+    def __init__(self) -> None:
+        pass
+
+    def __call__(
+        self, x: np.ndarray, y: np.ndarray
+    ) -> Tuple[np.ndarray, Optional[np.ndarray]]:
+        samplerate_hz = 16000  # sample rate for wav files
+        window_time_secs = 0.030  # 30 ms windows for frequency transforms
+        num_samples_overlap = int(0.025 * samplerate_hz)  # 25 ms of overlap
+
+        # -- NORMALIZATION PARAMETERS --#
+        zscore_mean = -0.7731548539849517
+        zscore_std = 3.5610712683198624
+        scale_max = 15.441861
+        scale_min = -4.6051702
+
+        # Construct window
+        window_num_samples = int(window_time_secs * samplerate_hz)
+        window = signal.get_window(("tukey", 0.25), window_num_samples)
+
+        def normalize_spectrogram(s):
+            """ Normalize spectrogram s:
+            1. s_ = np.log(s + 0.01)
+            2. s_ = zscores(s_)
+            3. s_ = minmax_scale(s_)
+
+            Return normalized spectrogram s_ in range [-1, 1] with mean ~ 0 and std ~ 1
+            """
+            s_ = np.log(s + 0.01)
+            s_ = (s_ - zscore_mean) / zscore_std
+            s_ = (s_ - scale_min) / (scale_max - scale_min)
+            return s_
+
+        def spectrogram_241(samples):
+            """ Return vector of frequences (f), vector of times (t), and 2d matrix spectrogram (s)
+            for input audio samples.
+            """
+            # Construct spectrogram (f = frequencies array, t = times array, s = 2d spectrogram [f x t])
+            f, t, s = signal.spectrogram(
+                samples, samplerate_hz, window=window, noverlap=num_samples_overlap
+            )
+
+            # Normalize spectrogram
+            s = normalize_spectrogram(s)
+
+            return f, t, s
+
+        def segment(x, y, n_time_bins):
+            """
+            Return segmented batch of spectrograms and labels
+
+            x is of shape (N,241,T), representing N spectrograms, each with 241 frequency bins
+            and T time bins that's variable, depending on the duration of the corresponding
+            raw audio.
+
+            The model accepts a fixed size spectrogram, so data needs to be segmented for a
+            fixed number of time_bins.
+            """
+            x_seg = []
+            for xt in x:
+                n_seg = int(xt.shape[1] / n_time_bins)
+                xt = xt[:, : n_seg * n_time_bins]
+                for ii in range(n_seg):
+                    x_seg.append(xt[:, ii * n_time_bins : (ii + 1) * n_time_bins])
+            x_seg = np.array(x_seg)
+            x_seg = np.expand_dims(x_seg, -1)
+            return x_seg, y
+
+        quantization = 2 ** 15
+        n_tbins = 100  # number of time bins in spectrogram input to model
+        if x.dtype == np.float32:
+            x = [x]
+
+        outputs = []
+        for aud in x:
+            aud = np.squeeze(
+                (aud * quantization).astype(np.int64)
+            )  # Reverse canonical preprocessing
+            _, _, s = spectrogram_241(aud)
+            outputs.append(s)
+        return segment(outputs, y, n_tbins)
+
+    def estimate_gradient(self, x: np.ndarray, grad: np.ndarray) -> np.ndarray:
+        return grad
+
+    def apply_fit(self):
+        return True
+
+    def apply_predict(self):
+        return True
+
+    def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> None:
+        """
+        No parameters to learn for this method; do nothing.
+        """
+        pass
diff --git a/example_models/keras/librispeech_spectrogram.py b/example_models/keras/librispeech_spectrogram.py
index afbcbd1..c33d2cb 100644
--- a/example_models/keras/librispeech_spectrogram.py
+++ b/example_models/keras/librispeech_spectrogram.py
@@ -3,8 +3,6 @@
 
 Model contributed by: MITRE Corporation
 """
-import numpy as np
-from scipy import signal
 
 import tensorflow as tf
 import tensorflow.keras as keras
@@ -15,61 +13,6 @@
 from armory.data.utils import maybe_download_weights_from_s3
 
 
-def preprocessing_fn(audios):
-    # -- SPECTROGRAM PARAMETERS --#
-    samplerate_hz = 16000  # sample rate for wav files
-    window_time_secs = 0.030  # 30 ms windows for frequency transforms
-    num_samples_overlap = int(0.025 * samplerate_hz)  # 25 ms of overlap
-
-    # -- NORMALIZATION PARAMETERS --#
-    zscore_mean = -0.7731548539849517
-    zscore_std = 3.5610712683198624
-    scale_max = 15.441861
-    scale_min = -4.6051702
-
-    # Construct window
-    window_num_samples = int(window_time_secs * samplerate_hz)
-    window = signal.get_window(("tukey", 0.25), window_num_samples)
-
-    def normalize_spectrogram(s):
-        """ Normalize spectrogram s:
-        1. s_ = np.log(s + 0.01)
-        2. s_ = zscores(s_)
-        3. s_ = minmax_scale(s_)
-
-        Return normalized spectrogram s_ in range [-1, 1] with mean ~ 0 and std ~ 1
-        """
-        s_ = np.log(s + 0.01)
-        s_ = (s_ - zscore_mean) / zscore_std
-        s_ = (s_ - scale_min) / (scale_max - scale_min)
-        return s_
-
-    def spectrogram_241(samples):
-        """ Return vector of frequences (f), vector of times (t), and 2d matrix spectrogram (s)
-        for input audio samples.
-        """
-        # Construct spectrogram (f = frequencies array, t = times array, s = 2d spectrogram [f x t])
-        f, t, s = signal.spectrogram(
-            samples, samplerate_hz, window=window, noverlap=num_samples_overlap
-        )
-
-        # Normalize spectrogram
-        s = normalize_spectrogram(s)
-
-        return f, t, s
-
-    if audios.dtype == np.int64:
-        audios = [audios]
-
-    outputs = []
-    for aud in audios:
-        aud = np.squeeze(aud)
-        _, _, s = spectrogram_241(aud)
-        outputs.append(s)
-
-    return outputs
-
-
 def make_model(**kwargs) -> tf.keras.Model:
     model = Sequential()
     model.add(
@@ -96,7 +39,6 @@ def make_model(**kwargs) -> tf.keras.Model:
         optimizer=keras.optimizers.Adam(lr=0.0002),
         metrics=["accuracy"],
     )
-
     return model
 
 
diff --git a/example_scenario_configs/librispeech_spectrogram_classification.json b/example_scenario_configs/librispeech_spectrogram_classification.json
index d896ea4..dc0d881 100644
--- a/example_scenario_configs/librispeech_spectrogram_classification.json
+++ b/example_scenario_configs/librispeech_spectrogram_classification.json
@@ -4,22 +4,24 @@
     "attack": {
         "knowledge": "white",
         "kwargs": {
-            "batch_size": 32,
-            "eps": 0.2,
-            "eps_step": 0.1,
-            "minimal": false,
-            "num_random_init": 0,
-            "targeted": false
+            "max_eval": 200,
+            "max_iter": 100,
+            "norm": "inf"
         },
         "module": "art.attacks.evasion",
-        "name": "FastGradientMethod"
+        "name": "HopSkipJump"
     },
     "dataset": {
-        "batch_size": 64,
+        "batch_size": 1,
         "module": "armory.data.datasets",
         "name": "librispeech_dev_clean"
     },
-    "defense": null,
+    "defense": {
+        "kwargs": {},
+        "module": "example_defenses.spectogram_preprocessing",
+        "name": "Spectogram",
+        "type": "Preprocessor"
+    },
     "metric": null,
     "model": {
         "fit": false,
@@ -41,6 +43,6 @@
         "docker_image": "twosixarmory/tf1:0.12.1",
         "external_github_repo": null,
         "gpus": "all",
-        "use_gpu": true
+        "use_gpu": false
     }
 }
diff --git a/example_scenarios/audio_spectrogram_classification.py b/example_scenarios/audio_spectrogram_classification.py
index 223a9b2..276c5af 100644
--- a/example_scenarios/audio_spectrogram_classification.py
+++ b/example_scenarios/audio_spectrogram_classification.py
@@ -18,6 +18,7 @@
     load_dataset,
     load_model,
     load_attack,
+    load_defense_internal,
 )
 from armory.utils import metrics
 from armory.scenarios.base import Scenario
@@ -25,31 +26,6 @@
 logger = logging.getLogger(__name__)
 
 
-def segment(x, y, n_time_bins):
-    """
-    Return segmented batch of spectrograms and labels
-
-    x is of shape (N,241,T), representing N spectrograms, each with 241 frequency bins
-    and T time bins that's variable, depending on the duration of the corresponding
-    raw audio.
-
-    The model accepts a fixed size spectrogram, so data needs to be segmented for a
-    fixed number of time_bins.
-    """
-
-    x_seg, y_seg = [], []
-    for xt, yt in zip(x, y):
-        n_seg = int(xt.shape[1] / n_time_bins)
-        xt = xt[:, : n_seg * n_time_bins]
-        for ii in range(n_seg):
-            x_seg.append(xt[:, ii * n_time_bins : (ii + 1) * n_time_bins])
-            y_seg.append(yt)
-    x_seg = np.array(x_seg)
-    x_seg = np.expand_dims(x_seg, -1)
-    y_seg = np.array(y_seg)
-    return x_seg, y_seg
-
-
 class AudioSpectrogramClassificationTask(Scenario):
     def _evaluate(
         self, config: dict, num_eval_batches: Optional[int], skip_benign: Optional[bool]
@@ -58,31 +34,36 @@ def _evaluate(
         Evaluate a config file for classification robustness against attack.
         """
         model_config = config["model"]
-        classifier, preprocessing_fn = load_model(model_config)
+        classifier, _ = load_model(model_config)
 
-        n_tbins = 100  # number of time bins in spectrogram input to model
+        defense_config = config.get("defense") or {}
+        defense_type = defense_config.get("type")
+
+        if defense_type in ["Preprocessor", "Postprocessor"]:
+            logger.info(f"Applying internal {defense_type} defense to classifier")
+            classifier = load_defense_internal(config["defense"], classifier)
 
         task_metric = metrics.categorical_accuracy
 
+        if config["dataset"]["batch_size"] != 1:
+            raise NotImplementedError("Currently only supports batch size of 1")
+
         # Train ART classifier
         if not model_config["weights_file"]:
+            raise NotImplementedError("Gradients not available for training.")
             classifier.set_learning_phase(True)
             logger.info(
                 f"Fitting model {model_config['module']}.{model_config['name']}..."
             )
             fit_kwargs = model_config["fit_kwargs"]
             train_data_generator = load_dataset(
-                config["dataset"],
-                epochs=fit_kwargs["nb_epochs"],
-                split_type="train",
-                preprocessing_fn=preprocessing_fn,
+                config["dataset"], epochs=fit_kwargs["nb_epochs"], split_type="train",
             )
 
             for cnt, (x, y) in tqdm(enumerate(train_data_generator)):
-                x_seg, y_seg = segment(x, y, n_tbins)
                 classifier.fit(
-                    x_seg,
-                    y_seg,
+                    x,
+                    y,
                     batch_size=config["dataset"]["batch_size"],
                     nb_epochs=1,
                     verbose=True,
@@ -91,71 +72,70 @@ def _evaluate(
                 if (cnt + 1) % train_data_generator.batches_per_epoch == 0:
                     # evaluate on validation examples
                     val_data_generator = load_dataset(
-                        config["dataset"],
-                        epochs=1,
-                        split_type="validation",
-                        preprocessing_fn=preprocessing_fn,
+                        config["dataset"], epochs=1, split_type="validation",
                     )
 
                     cnt = 0
                     validation_accuracies = []
                     for x_val, y_val in tqdm(val_data_generator):
-                        x_val_seg, y_val_seg = segment(x_val, y_val, n_tbins)
-                        y_pred = classifier.predict(x_val_seg)
-                        validation_accuracies.extend(task_metric(y_val_seg, y_pred))
-                        cnt += len(y_val_seg)
+                        y_pred = np.mean(
+                            classifier.predict(x_val, batch_size=1),
+                            axis=0,
+                            keepdims=True,
+                        )
+                        validation_accuracies.extend(task_metric(y_val, y_pred))
+                        cnt += len(y_val)
                     validation_accuracy = sum(validation_accuracies) / cnt
                     logger.info("Validation accuracy: {}".format(validation_accuracy))
 
         classifier.set_learning_phase(False)
         # Evaluate ART classifier on test examples
-        logger.info(f"Loading testing dataset {config['dataset']['name']}...")
-        test_data_generator = load_dataset(
-            config["dataset"],
-            epochs=1,
-            split_type="test",
-            preprocessing_fn=preprocessing_fn,
-        )
-
-        logger.info("Running inference on benign test examples...")
+        if skip_benign:
+            logger.info("Skipping benign classification...")
+        else:
+            logger.info(f"Loading testing dataset {config['dataset']['name']}...")
+            test_data_generator = load_dataset(
+                config["dataset"],
+                epochs=1,
+                split_type="test",
+                num_batches=num_eval_batches,
+            )
+            logger.info("Running inference on benign test examples...")
 
-        cnt = 0
-        benign_accuracies = []
-        for x, y in tqdm(test_data_generator, desc="Benign"):
-            x_seg, y_seg = segment(x, y, n_tbins)
-            y_pred = classifier.predict(x_seg)
-            benign_accuracies.extend(task_metric(y_seg, y_pred))
-            cnt += len(y_seg)
+            cnt = 0
+            benign_accuracies = []
+            for x, y in tqdm(test_data_generator, desc="Benign"):
+                y_pred = np.mean(
+                    classifier.predict(x, batch_size=1), axis=0, keepdims=True
+                )
+                benign_accuracies.extend(task_metric(y, y_pred))
+                cnt += len(y)
 
-        benign_accuracy = sum(benign_accuracies) / cnt
-        logger.info(f"Accuracy on benign test examples: {benign_accuracy:.2%}")
+            benign_accuracy = sum(benign_accuracies) / cnt
+            logger.info(f"Accuracy on benign test examples: {benign_accuracy:.2%}")
 
         # Evaluate the ART classifier on adversarial test examples
         logger.info("Generating / testing adversarial examples...")
         attack = load_attack(config["attack"], classifier)
 
         test_data_generator = load_dataset(
-            config["dataset"],
-            epochs=1,
-            split_type="test",
-            preprocessing_fn=preprocessing_fn,
+            config["dataset"], epochs=1, split_type="test", num_batches=num_eval_batches
         )
 
         cnt = 0
         adversarial_accuracies = []
         for x, y in tqdm(test_data_generator, desc="Attack"):
-            x_seg, y_seg = segment(x, y, n_tbins)
-            x_adv = attack.generate(x=x_seg)
-            y_pred = classifier.predict(x_adv)
-            adversarial_accuracies.extend(task_metric(y_seg, y_pred))
-            cnt += len(y_seg)
+            x_adv = attack.generate(x=x)
+            y_pred = np.mean(
+                classifier.predict(x_adv, batch_size=1), axis=0, keepdims=True
+            )
+            adversarial_accuracies.extend(task_metric(y, y_pred))
+            cnt += len(y)
         adversarial_accuracy = sum(adversarial_accuracies) / cnt
         logger.info(
             f"Accuracy on adversarial test examples: {adversarial_accuracy:.2%}"
         )
-
-        results = {
-            "mean_benign_accuracy": benign_accuracy,
-            "mean_adversarial_accuracy": adversarial_accuracy,
-        }
+        results = {"mean_adversarial_accuracy": adversarial_accuracy}
+        if not skip_benign:
+            results["mean_benign_accuracy"] = benign_accuracy
         return results