diff --git a/example_defenses/spectogram_preprocessing.py b/example_defenses/spectogram_preprocessing.py new file mode 100644 index 0000000..fa8e470 --- /dev/null +++ b/example_defenses/spectogram_preprocessing.py @@ -0,0 +1,104 @@ +import numpy as np +from scipy import signal +from typing import Optional, Tuple + +from art.defences.preprocessor import Preprocessor + + +class Spectogram(Preprocessor): + def __init__(self) -> None: + pass + + def __call__( + self, x: np.ndarray, y: np.ndarray + ) -> Tuple[np.ndarray, Optional[np.ndarray]]: + samplerate_hz = 16000 # sample rate for wav files + window_time_secs = 0.030 # 30 ms windows for frequency transforms + num_samples_overlap = int(0.025 * samplerate_hz) # 25 ms of overlap + + # -- NORMALIZATION PARAMETERS --# + zscore_mean = -0.7731548539849517 + zscore_std = 3.5610712683198624 + scale_max = 15.441861 + scale_min = -4.6051702 + + # Construct window + window_num_samples = int(window_time_secs * samplerate_hz) + window = signal.get_window(("tukey", 0.25), window_num_samples) + + def normalize_spectrogram(s): + """ Normalize spectrogram s: + 1. s_ = np.log(s + 0.01) + 2. s_ = zscores(s_) + 3. s_ = minmax_scale(s_) + + Return normalized spectrogram s_ in range [-1, 1] with mean ~ 0 and std ~ 1 + """ + s_ = np.log(s + 0.01) + s_ = (s_ - zscore_mean) / zscore_std + s_ = (s_ - scale_min) / (scale_max - scale_min) + return s_ + + def spectrogram_241(samples): + """ Return vector of frequences (f), vector of times (t), and 2d matrix spectrogram (s) + for input audio samples. + """ + # Construct spectrogram (f = frequencies array, t = times array, s = 2d spectrogram [f x t]) + f, t, s = signal.spectrogram( + samples, samplerate_hz, window=window, noverlap=num_samples_overlap + ) + + # Normalize spectrogram + s = normalize_spectrogram(s) + + return f, t, s + + def segment(x, y, n_time_bins): + """ + Return segmented batch of spectrograms and labels + + x is of shape (N,241,T), representing N spectrograms, each with 241 frequency bins + and T time bins that's variable, depending on the duration of the corresponding + raw audio. + + The model accepts a fixed size spectrogram, so data needs to be segmented for a + fixed number of time_bins. + """ + x_seg = [] + for xt in x: + n_seg = int(xt.shape[1] / n_time_bins) + xt = xt[:, : n_seg * n_time_bins] + for ii in range(n_seg): + x_seg.append(xt[:, ii * n_time_bins : (ii + 1) * n_time_bins]) + x_seg = np.array(x_seg) + x_seg = np.expand_dims(x_seg, -1) + return x_seg, y + + quantization = 2 ** 15 + n_tbins = 100 # number of time bins in spectrogram input to model + if x.dtype == np.float32: + x = [x] + + outputs = [] + for aud in x: + aud = np.squeeze( + (aud * quantization).astype(np.int64) + ) # Reverse canonical preprocessing + _, _, s = spectrogram_241(aud) + outputs.append(s) + return segment(outputs, y, n_tbins) + + def estimate_gradient(self, x: np.ndarray, grad: np.ndarray) -> np.ndarray: + return grad + + def apply_fit(self): + return True + + def apply_predict(self): + return True + + def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> None: + """ + No parameters to learn for this method; do nothing. + """ + pass diff --git a/example_models/keras/librispeech_spectrogram.py b/example_models/keras/librispeech_spectrogram.py index afbcbd1..c33d2cb 100644 --- a/example_models/keras/librispeech_spectrogram.py +++ b/example_models/keras/librispeech_spectrogram.py @@ -3,8 +3,6 @@ Model contributed by: MITRE Corporation """ -import numpy as np -from scipy import signal import tensorflow as tf import tensorflow.keras as keras @@ -15,61 +13,6 @@ from armory.data.utils import maybe_download_weights_from_s3 -def preprocessing_fn(audios): - # -- SPECTROGRAM PARAMETERS --# - samplerate_hz = 16000 # sample rate for wav files - window_time_secs = 0.030 # 30 ms windows for frequency transforms - num_samples_overlap = int(0.025 * samplerate_hz) # 25 ms of overlap - - # -- NORMALIZATION PARAMETERS --# - zscore_mean = -0.7731548539849517 - zscore_std = 3.5610712683198624 - scale_max = 15.441861 - scale_min = -4.6051702 - - # Construct window - window_num_samples = int(window_time_secs * samplerate_hz) - window = signal.get_window(("tukey", 0.25), window_num_samples) - - def normalize_spectrogram(s): - """ Normalize spectrogram s: - 1. s_ = np.log(s + 0.01) - 2. s_ = zscores(s_) - 3. s_ = minmax_scale(s_) - - Return normalized spectrogram s_ in range [-1, 1] with mean ~ 0 and std ~ 1 - """ - s_ = np.log(s + 0.01) - s_ = (s_ - zscore_mean) / zscore_std - s_ = (s_ - scale_min) / (scale_max - scale_min) - return s_ - - def spectrogram_241(samples): - """ Return vector of frequences (f), vector of times (t), and 2d matrix spectrogram (s) - for input audio samples. - """ - # Construct spectrogram (f = frequencies array, t = times array, s = 2d spectrogram [f x t]) - f, t, s = signal.spectrogram( - samples, samplerate_hz, window=window, noverlap=num_samples_overlap - ) - - # Normalize spectrogram - s = normalize_spectrogram(s) - - return f, t, s - - if audios.dtype == np.int64: - audios = [audios] - - outputs = [] - for aud in audios: - aud = np.squeeze(aud) - _, _, s = spectrogram_241(aud) - outputs.append(s) - - return outputs - - def make_model(**kwargs) -> tf.keras.Model: model = Sequential() model.add( @@ -96,7 +39,6 @@ def make_model(**kwargs) -> tf.keras.Model: optimizer=keras.optimizers.Adam(lr=0.0002), metrics=["accuracy"], ) - return model diff --git a/example_scenario_configs/librispeech_spectrogram_classification.json b/example_scenario_configs/librispeech_spectrogram_classification.json index d896ea4..dc0d881 100644 --- a/example_scenario_configs/librispeech_spectrogram_classification.json +++ b/example_scenario_configs/librispeech_spectrogram_classification.json @@ -4,22 +4,24 @@ "attack": { "knowledge": "white", "kwargs": { - "batch_size": 32, - "eps": 0.2, - "eps_step": 0.1, - "minimal": false, - "num_random_init": 0, - "targeted": false + "max_eval": 200, + "max_iter": 100, + "norm": "inf" }, "module": "art.attacks.evasion", - "name": "FastGradientMethod" + "name": "HopSkipJump" }, "dataset": { - "batch_size": 64, + "batch_size": 1, "module": "armory.data.datasets", "name": "librispeech_dev_clean" }, - "defense": null, + "defense": { + "kwargs": {}, + "module": "example_defenses.spectogram_preprocessing", + "name": "Spectogram", + "type": "Preprocessor" + }, "metric": null, "model": { "fit": false, @@ -41,6 +43,6 @@ "docker_image": "twosixarmory/tf1:0.12.1", "external_github_repo": null, "gpus": "all", - "use_gpu": true + "use_gpu": false } } diff --git a/example_scenarios/audio_spectrogram_classification.py b/example_scenarios/audio_spectrogram_classification.py index 223a9b2..276c5af 100644 --- a/example_scenarios/audio_spectrogram_classification.py +++ b/example_scenarios/audio_spectrogram_classification.py @@ -18,6 +18,7 @@ load_dataset, load_model, load_attack, + load_defense_internal, ) from armory.utils import metrics from armory.scenarios.base import Scenario @@ -25,31 +26,6 @@ logger = logging.getLogger(__name__) -def segment(x, y, n_time_bins): - """ - Return segmented batch of spectrograms and labels - - x is of shape (N,241,T), representing N spectrograms, each with 241 frequency bins - and T time bins that's variable, depending on the duration of the corresponding - raw audio. - - The model accepts a fixed size spectrogram, so data needs to be segmented for a - fixed number of time_bins. - """ - - x_seg, y_seg = [], [] - for xt, yt in zip(x, y): - n_seg = int(xt.shape[1] / n_time_bins) - xt = xt[:, : n_seg * n_time_bins] - for ii in range(n_seg): - x_seg.append(xt[:, ii * n_time_bins : (ii + 1) * n_time_bins]) - y_seg.append(yt) - x_seg = np.array(x_seg) - x_seg = np.expand_dims(x_seg, -1) - y_seg = np.array(y_seg) - return x_seg, y_seg - - class AudioSpectrogramClassificationTask(Scenario): def _evaluate( self, config: dict, num_eval_batches: Optional[int], skip_benign: Optional[bool] @@ -58,31 +34,36 @@ def _evaluate( Evaluate a config file for classification robustness against attack. """ model_config = config["model"] - classifier, preprocessing_fn = load_model(model_config) + classifier, _ = load_model(model_config) - n_tbins = 100 # number of time bins in spectrogram input to model + defense_config = config.get("defense") or {} + defense_type = defense_config.get("type") + + if defense_type in ["Preprocessor", "Postprocessor"]: + logger.info(f"Applying internal {defense_type} defense to classifier") + classifier = load_defense_internal(config["defense"], classifier) task_metric = metrics.categorical_accuracy + if config["dataset"]["batch_size"] != 1: + raise NotImplementedError("Currently only supports batch size of 1") + # Train ART classifier if not model_config["weights_file"]: + raise NotImplementedError("Gradients not available for training.") classifier.set_learning_phase(True) logger.info( f"Fitting model {model_config['module']}.{model_config['name']}..." ) fit_kwargs = model_config["fit_kwargs"] train_data_generator = load_dataset( - config["dataset"], - epochs=fit_kwargs["nb_epochs"], - split_type="train", - preprocessing_fn=preprocessing_fn, + config["dataset"], epochs=fit_kwargs["nb_epochs"], split_type="train", ) for cnt, (x, y) in tqdm(enumerate(train_data_generator)): - x_seg, y_seg = segment(x, y, n_tbins) classifier.fit( - x_seg, - y_seg, + x, + y, batch_size=config["dataset"]["batch_size"], nb_epochs=1, verbose=True, @@ -91,71 +72,70 @@ def _evaluate( if (cnt + 1) % train_data_generator.batches_per_epoch == 0: # evaluate on validation examples val_data_generator = load_dataset( - config["dataset"], - epochs=1, - split_type="validation", - preprocessing_fn=preprocessing_fn, + config["dataset"], epochs=1, split_type="validation", ) cnt = 0 validation_accuracies = [] for x_val, y_val in tqdm(val_data_generator): - x_val_seg, y_val_seg = segment(x_val, y_val, n_tbins) - y_pred = classifier.predict(x_val_seg) - validation_accuracies.extend(task_metric(y_val_seg, y_pred)) - cnt += len(y_val_seg) + y_pred = np.mean( + classifier.predict(x_val, batch_size=1), + axis=0, + keepdims=True, + ) + validation_accuracies.extend(task_metric(y_val, y_pred)) + cnt += len(y_val) validation_accuracy = sum(validation_accuracies) / cnt logger.info("Validation accuracy: {}".format(validation_accuracy)) classifier.set_learning_phase(False) # Evaluate ART classifier on test examples - logger.info(f"Loading testing dataset {config['dataset']['name']}...") - test_data_generator = load_dataset( - config["dataset"], - epochs=1, - split_type="test", - preprocessing_fn=preprocessing_fn, - ) - - logger.info("Running inference on benign test examples...") + if skip_benign: + logger.info("Skipping benign classification...") + else: + logger.info(f"Loading testing dataset {config['dataset']['name']}...") + test_data_generator = load_dataset( + config["dataset"], + epochs=1, + split_type="test", + num_batches=num_eval_batches, + ) + logger.info("Running inference on benign test examples...") - cnt = 0 - benign_accuracies = [] - for x, y in tqdm(test_data_generator, desc="Benign"): - x_seg, y_seg = segment(x, y, n_tbins) - y_pred = classifier.predict(x_seg) - benign_accuracies.extend(task_metric(y_seg, y_pred)) - cnt += len(y_seg) + cnt = 0 + benign_accuracies = [] + for x, y in tqdm(test_data_generator, desc="Benign"): + y_pred = np.mean( + classifier.predict(x, batch_size=1), axis=0, keepdims=True + ) + benign_accuracies.extend(task_metric(y, y_pred)) + cnt += len(y) - benign_accuracy = sum(benign_accuracies) / cnt - logger.info(f"Accuracy on benign test examples: {benign_accuracy:.2%}") + benign_accuracy = sum(benign_accuracies) / cnt + logger.info(f"Accuracy on benign test examples: {benign_accuracy:.2%}") # Evaluate the ART classifier on adversarial test examples logger.info("Generating / testing adversarial examples...") attack = load_attack(config["attack"], classifier) test_data_generator = load_dataset( - config["dataset"], - epochs=1, - split_type="test", - preprocessing_fn=preprocessing_fn, + config["dataset"], epochs=1, split_type="test", num_batches=num_eval_batches ) cnt = 0 adversarial_accuracies = [] for x, y in tqdm(test_data_generator, desc="Attack"): - x_seg, y_seg = segment(x, y, n_tbins) - x_adv = attack.generate(x=x_seg) - y_pred = classifier.predict(x_adv) - adversarial_accuracies.extend(task_metric(y_seg, y_pred)) - cnt += len(y_seg) + x_adv = attack.generate(x=x) + y_pred = np.mean( + classifier.predict(x_adv, batch_size=1), axis=0, keepdims=True + ) + adversarial_accuracies.extend(task_metric(y, y_pred)) + cnt += len(y) adversarial_accuracy = sum(adversarial_accuracies) / cnt logger.info( f"Accuracy on adversarial test examples: {adversarial_accuracy:.2%}" ) - - results = { - "mean_benign_accuracy": benign_accuracy, - "mean_adversarial_accuracy": adversarial_accuracy, - } + results = {"mean_adversarial_accuracy": adversarial_accuracy} + if not skip_benign: + results["mean_benign_accuracy"] = benign_accuracy return results