From 25412eb80f557f22980303fce20f070089c0d38e Mon Sep 17 00:00:00 2001 From: Asim Waheed Date: Mon, 22 Jul 2024 15:18:02 -0400 Subject: [PATCH 1/3] Refactor distribution inference into master class --- .../attacks/__init__.py | 4 +- ...ce.py => distribution_inference_attack.py} | 319 ++++-------------- .../attacks/suri_satml_2023.py | 225 ++++++++++++ .../run_distribution_inference.py | 4 +- 4 files changed, 293 insertions(+), 259 deletions(-) rename amulet/distribution_inference/attacks/{distribution_inference.py => distribution_inference_attack.py} (53%) create mode 100644 amulet/distribution_inference/attacks/suri_satml_2023.py diff --git a/amulet/distribution_inference/attacks/__init__.py b/amulet/distribution_inference/attacks/__init__.py index ff16c3d..a705345 100644 --- a/amulet/distribution_inference/attacks/__init__.py +++ b/amulet/distribution_inference/attacks/__init__.py @@ -1,3 +1,3 @@ -from .distribution_inference import DistributionInference +from .suri_satml_2023 import SuriSATML2023 -__all__ = ["DistributionInference"] +__all__ = ["SuriSATML2023"] diff --git a/amulet/distribution_inference/attacks/distribution_inference.py b/amulet/distribution_inference/attacks/distribution_inference_attack.py similarity index 53% rename from amulet/distribution_inference/attacks/distribution_inference.py rename to amulet/distribution_inference/attacks/distribution_inference_attack.py index 6e114c6..10c408d 100644 --- a/amulet/distribution_inference/attacks/distribution_inference.py +++ b/amulet/distribution_inference/attacks/distribution_inference_attack.py @@ -1,34 +1,10 @@ import sys -import torch -import argparse -import numpy as np import pandas as pd -from tqdm import tqdm -from torch.utils.data import TensorDataset, ConcatDataset, DataLoader +import numpy as np +import torch from sklearn.model_selection import StratifiedShuffleSplit - - -def filter(df, condition, ratio, verbose=True): - ratio = float(ratio) - qualify = np.nonzero((condition(df)).to_numpy())[0] - notqualify = np.nonzero(np.logical_not((condition(df)).to_numpy()))[0] - current_ratio = len(qualify) / (len(qualify) + len(notqualify)) - # If current ratio less than desired ratio, subsample from non-ratio - if verbose: - print("Changing ratio from %.2f to %.2f" % (current_ratio, ratio)) - if current_ratio <= ratio: - np.random.shuffle(notqualify) - if ratio < 1: - nqi = notqualify[: int(((1 - ratio) * len(qualify)) / ratio)] - return pd.concat([df.iloc[qualify], df.iloc[nqi]]) - return df.iloc[qualify] - else: - np.random.shuffle(qualify) - if ratio > 0: - qi = qualify[: int((ratio * len(notqualify)) / (1 - ratio))] - return pd.concat([df.iloc[qi], df.iloc[notqualify]]) - return df.iloc[notqualify] - +from torch.utils.data import TensorDataset, ConcatDataset, DataLoader +from tqdm import tqdm def heuristic( df, @@ -83,6 +59,26 @@ def heuristic( picked_df = pckds[np.argmin(vals)] return picked_df.reset_index(drop=True) +def filter(df, condition, ratio, verbose=True): + ratio = float(ratio) + qualify = np.nonzero((condition(df)).to_numpy())[0] + notqualify = np.nonzero(np.logical_not((condition(df)).to_numpy()))[0] + current_ratio = len(qualify) / (len(qualify) + len(notqualify)) + # If current ratio less than desired ratio, subsample from non-ratio + if verbose: + print("Changing ratio from %.2f to %.2f" % (current_ratio, ratio)) + if current_ratio <= ratio: + np.random.shuffle(notqualify) + if ratio < 1: + nqi = notqualify[: int(((1 - ratio) * len(qualify)) / ratio)] + return pd.concat([df.iloc[qualify], df.iloc[nqi]]) + return df.iloc[qualify] + else: + np.random.shuffle(qualify) + if ratio > 0: + qi = qualify[: int((ratio * len(notqualify)) / (1 - ratio))] + return pd.concat([df.iloc[qi], df.iloc[notqualify]]) + return df.iloc[notqualify] def get_filter(df, filter_prop, split, ratio, dataset_name, is_test): if dataset_name == "census": @@ -155,36 +151,12 @@ def lambda_fn(x): verbose=False, ) - -class DistributionInference: - """ - Implementation of attribute inference attack from the method from: - https://github.com/vasishtduddu/AttInfExplanations - - - Attributes: - target_model: :class:`~torch.nn.Module` - This model will be extracted. - x_train: :class:`~numpy.ndarray` - input features for training adversary' attack model - x_test: :class:`~numpy.ndarray` - input features for testing adversary' attack model - y_train: :class:`~numpy.ndarray` - class labels for train dataset - y_test: :class:`~numpy.ndarray` - class labels for test dataset - z_train: :class:`~numpy.ndarray` - sensitive attributes for training adversary' attack model (includes both "race" and "sex") - z_test: :class:`~numpy.ndarray` - sensitive attributes for training adversary' attack model (includes both "race" and "sex") - filter_prop: str - Filter: "race", "sex" - ratio1: float - ratio of distribution 1 - ratio2: float - ratio of distribution 2 - """ - +# TODO: List of issues to fix: +# - Does not use target model, instead trains many "victim" models. +# - Hardcoded values for attributes. Needs to be generalized. +# - Need to figure out a design that attacks a single target model. +# For evaluation using metrics we may need to figure out a more complex pipeline. +class DistributionInferenceAttack: def __init__( self, x_train: np.ndarray, @@ -193,19 +165,17 @@ def __init__( y_test: np.ndarray, z_train: np.ndarray, z_test: np.ndarray, - filter_prop: str, + dataset_name: str, ratio1: float, ratio2: float, - device: str, - args: argparse.Namespace, - ): + filter_prop: str, + ) -> None: + self.x_train, self.y_train, self.z_train = x_train, y_train, z_train + self.x_test, self.y_test, self.z_test = x_test, y_test, z_test + self.dataset_name = dataset_name self.ratio1 = ratio1 self.ratio2 = ratio2 - self.device = device self.filter_prop = filter_prop - self.x_train, self.y_train, self.z_train = x_train, y_train, z_train - self.x_test, self.y_test, self.z_test = x_test, y_test, z_test - self.args = args def prepare_dataset(self): x_train = pd.DataFrame(self.x_train) @@ -229,16 +199,15 @@ def s_split(this_df): return this_df.iloc[split_1], this_df.iloc[split_2] # Create train/test splits for victim/adv - self.train_df_victim, self.train_df_adv = s_split(df_train) - self.test_df_victim, self.test_df_adv = s_split(df_test) - # print(self.train_df_victim.shape,self.train_df_adv.shape) + train_df_victim, train_df_adv = s_split(df_train) + test_df_victim, test_df_adv = s_split(df_test) def prepare_one_set(TRAIN_DF, TEST_DF, split, prop_ratio, filter_prop): TRAIN_DF = get_filter( - TRAIN_DF, filter_prop, split, prop_ratio, self.args.dataset, is_test=0 + TRAIN_DF, filter_prop, split, prop_ratio, self.dataset_name, is_test=0 ) TEST_DF = get_filter( - TEST_DF, filter_prop, split, prop_ratio, self.args.dataset, is_test=1 + TEST_DF, filter_prop, split, prop_ratio, self.dataset_name, is_test=1 ) # keep the test dataset fixed (x_tr, y_tr, cols), (x_te, y_te, cols) = ( self.get_x_y(TRAIN_DF), @@ -248,8 +217,8 @@ def prepare_one_set(TRAIN_DF, TEST_DF, split, prop_ratio, filter_prop): (X_train_victim_1, y_train_victim_1), (X_test_victim_1, y_test_victim_1), _ = ( prepare_one_set( - self.train_df_victim, - self.test_df_victim, + train_df_victim, + test_df_victim, "victim", self.ratio1, self.filter_prop, @@ -260,16 +229,16 @@ def prepare_one_set(TRAIN_DF, TEST_DF, split, prop_ratio, filter_prop): (X_test_attacker_1, y_test_attacker_1), _, ) = prepare_one_set( - self.train_df_adv, - self.test_df_adv, + train_df_adv, + test_df_adv, "attacker", self.ratio1, self.filter_prop, ) (X_train_victim_2, y_train_victim_2), (X_test_victim_2, y_test_victim_2), _ = ( prepare_one_set( - self.train_df_victim, - self.test_df_victim, + train_df_victim, + test_df_victim, "victim", self.ratio2, self.filter_prop, @@ -280,61 +249,61 @@ def prepare_one_set(TRAIN_DF, TEST_DF, split, prop_ratio, filter_prop): (X_test_attacker_2, y_test_attacker_2), _, ) = prepare_one_set( - self.train_df_adv, - self.test_df_adv, + train_df_adv, + test_df_adv, "attacker", self.ratio2, self.filter_prop, ) - self.vic_traindata_1 = TensorDataset( + vic_traindata_1 = TensorDataset( torch.from_numpy(np.array(X_train_victim_1)).type(torch.float), torch.from_numpy(np.array(y_train_victim_1)).type(torch.long).squeeze(1), ) - self.att_traindata_1 = TensorDataset( + att_traindata_1 = TensorDataset( torch.from_numpy(np.array(X_train_attacker_1)).type(torch.float), torch.from_numpy(np.array(y_train_attacker_1)).type(torch.long).squeeze(1), ) - self.vic_traindata_2 = TensorDataset( + vic_traindata_2 = TensorDataset( torch.from_numpy(np.array(X_train_victim_2)).type(torch.float), torch.from_numpy(np.array(y_train_victim_2)).type(torch.long).squeeze(1), ) - self.att_traindata_2 = TensorDataset( + att_traindata_2 = TensorDataset( torch.from_numpy(np.array(X_train_attacker_2)).type(torch.float), torch.from_numpy(np.array(y_train_attacker_2)).type(torch.long).squeeze(1), ) - self.vic_testdata_1 = TensorDataset( + vic_testdata_1 = TensorDataset( torch.from_numpy(np.array(X_test_victim_1)).type(torch.float), torch.from_numpy(np.array(y_test_victim_1)).type(torch.long).squeeze(1), ) - self.att_testdata_1 = TensorDataset( + att_testdata_1 = TensorDataset( torch.from_numpy(np.array(X_test_attacker_1)).type(torch.float), torch.from_numpy(np.array(y_test_attacker_1)).type(torch.long).squeeze(1), ) - self.vic_testdata_2 = TensorDataset( + vic_testdata_2 = TensorDataset( torch.from_numpy(np.array(X_test_victim_2)).type(torch.float), torch.from_numpy(np.array(y_test_victim_2)).type(torch.long).squeeze(1), ) - self.att_testdata_2 = TensorDataset( + att_testdata_2 = TensorDataset( torch.from_numpy(np.array(X_test_attacker_2)).type(torch.float), torch.from_numpy(np.array(y_test_attacker_2)).type(torch.long).squeeze(1), ) - testdata_1 = ConcatDataset([self.att_testdata_1, self.vic_testdata_1]) - testdata_2 = ConcatDataset([self.att_testdata_2, self.vic_testdata_2]) + testdata_1 = ConcatDataset([att_testdata_1, vic_testdata_1]) + testdata_2 = ConcatDataset([att_testdata_2, vic_testdata_2]) vic_trainloader_1 = DataLoader( - dataset=self.vic_traindata_1, batch_size=256, shuffle=False + dataset=vic_traindata_1, batch_size=256, shuffle=False ) vic_trainloader_2 = DataLoader( - dataset=self.vic_traindata_2, batch_size=256, shuffle=False + dataset=vic_traindata_2, batch_size=256, shuffle=False ) att_trainloader_1 = DataLoader( - dataset=self.att_traindata_1, batch_size=256, shuffle=False + dataset=att_traindata_1, batch_size=256, shuffle=False ) att_trainloader_2 = DataLoader( - dataset=self.att_traindata_2, batch_size=256, shuffle=False + dataset=att_traindata_2, batch_size=256, shuffle=False ) test_loader_1 = DataLoader(dataset=testdata_1, batch_size=256, shuffle=False) test_loader_2 = DataLoader(dataset=testdata_2, batch_size=256, shuffle=False) @@ -347,7 +316,7 @@ def prepare_one_set(TRAIN_DF, TEST_DF, split, prop_ratio, filter_prop): test_loader_1, test_loader_2, ) - + def get_x_y(self, P): # Scale X values Y = P["y"].to_numpy() @@ -359,164 +328,4 @@ def get_x_y(self, P): # print(X["race"].value_counts()) cols = X.columns X = X.to_numpy() - return (X.astype(float), np.expand_dims(Y, 1), cols) - - def _get_kl_preds(self, ka, kb, kc1, kc2): - def sigmoid(x): - exp = np.exp(x) - return exp / (1 + exp) - - def KL(x, y): - small_eps = 1e-4 - x_ = np.clip(x, small_eps, 1 - small_eps) - y_ = np.clip(y, small_eps, 1 - small_eps) - x__, y__ = 1 - x_, 1 - y_ - first_term = x_ * (np.log(x_) - np.log(y_)) - second_term = x__ * (np.log(x__) - np.log(y__)) - return np.mean(first_term + second_term, 1) - - def _check(x): - if np.sum(np.isinf(x)) > 0 or np.sum(np.isnan(x)) > 0: - print("Invalid values:", x) - raise ValueError("Invalid values found!") - - def _pairwise_compare(x, y, xx, yy): - x_ = np.expand_dims(x, 2) - y_ = np.expand_dims(y, 2) - y_ = np.transpose(y_, (0, 2, 1)) - pairwise_comparisons = x_ - y_ - preds = np.array([z[xx, yy] for z in pairwise_comparisons]) - return preds - - ka_, kb_ = ka, kb - kc1_, kc2_ = kc1, kc2 - - ka_, kb_ = sigmoid(ka), sigmoid(kb) - kc1_, kc2_ = sigmoid(kc1), sigmoid(kc2) - - small_eps = 1e-4 - log_vals_a = np.log((small_eps + ka_) / (small_eps + 1 - ka_)) - log_vals_b = np.log((small_eps + kb_) / (small_eps + 1 - kb_)) - ordering = np.mean(np.abs(log_vals_a - log_vals_b), 0) - ordering = np.argsort(ordering)[::-1] - # Pick only first half - ordering = ordering[: len(ordering) // 2] - ka_, kb_ = ka_[:, ordering], kb_[:, ordering] - kc1_, kc2_ = kc1_[:, ordering], kc2_[:, ordering] - - # Consider all unique pairs of models - xx, yy = np.triu_indices(ka.shape[0], k=1) - - # Randomly pick pairs of models - random_pick = np.random.permutation(xx.shape[0])[: int(0.8 * xx.shape[0])] - xx, yy = xx[random_pick], yy[random_pick] - - # Compare the KL divergence between the two distributions - # For both sets of victim models - KL_vals_1_a = np.array([KL(ka_, x) for x in kc1_]) - _check(KL_vals_1_a) - KL_vals_1_b = np.array([KL(kb_, x) for x in kc1_]) - _check(KL_vals_1_b) - KL_vals_2_a = np.array([KL(ka_, x) for x in kc2_]) - _check(KL_vals_2_a) - KL_vals_2_b = np.array([KL(kb_, x) for x in kc2_]) - _check(KL_vals_2_b) - - preds_first = _pairwise_compare(KL_vals_1_a, KL_vals_1_b, xx, yy) - preds_second = _pairwise_compare(KL_vals_2_a, KL_vals_2_b, xx, yy) - - return preds_first, preds_second - - def get_preds(self, loader, models): - """ - Get predictions for given models on given data - """ - - predictions = [] - ground_truth = [] - # Accumulate all data for given loader - for data in loader: - labels = data[1] - ground_truth.append(labels.cpu().numpy()) - # if preload: - # inputs.append(features.cuda()) - ground_truth = np.concatenate(ground_truth, axis=0) - - iterator = tqdm(models, desc="Generating Predictions") - for model in iterator: - # Shift model to GPU - model = model.to(self.device) - # Make sure model is in evaluation mode - model.eval() - # Clear GPU cache - torch.cuda.empty_cache() - - with torch.no_grad(): - predictions_on_model = [] - for data in loader: - if len(data) == 2: - data_points, labels = data[0], data[1] - else: - data_points, labels, _ = data[0], data[1], data[2] - prediction = model(data_points.to(self.device)).detach() - # if not multi_class: - prediction = prediction[:, 0] - predictions_on_model.append(prediction) - predictions_on_model = torch.cat(predictions_on_model).cpu().numpy() - predictions.append(predictions_on_model) - # Shift model back to CPU - model = model.cpu() - del model - torch.cuda.empty_cache() - predictions = np.stack(predictions, 0) - torch.cuda.empty_cache() - - return predictions, ground_truth - - def attack( - self, - models_vic_1, - models_vic_2, - models_adv_1, - models_adv_2, - testloader_1, - testloader_2, - ): - preds_vic_prop1_dist1, _ = self.get_preds(testloader_1, models_vic_1) - preds_vic_prop2_dist1, _ = self.get_preds(testloader_1, models_vic_2) - preds_adv_prop1_dist1, _ = self.get_preds(testloader_1, models_adv_1) - preds_adv_prop2_dist1, _ = self.get_preds(testloader_1, models_adv_2) - - preds_vic_prop1_dist2, _ = self.get_preds(testloader_2, models_vic_1) - preds_vic_prop2_dist2, _ = self.get_preds(testloader_2, models_vic_2) - preds_adv_prop1_dist2, _ = self.get_preds(testloader_2, models_adv_1) - preds_adv_prop2_dist2, _ = self.get_preds(testloader_2, models_adv_2) - - preds_1_first, preds_1_second = self._get_kl_preds( - preds_adv_prop1_dist1, - preds_adv_prop2_dist1, - preds_vic_prop1_dist1, - preds_vic_prop2_dist1, - ) - preds_2_first, preds_2_second = self._get_kl_preds( - preds_adv_prop1_dist2, - preds_adv_prop2_dist2, - preds_vic_prop1_dist2, - preds_vic_prop2_dist2, - ) - - # Combine data - preds_first = np.concatenate((preds_1_first, preds_2_first), 1) - preds_second = np.concatenate((preds_1_second, preds_2_second), 1) - preds = np.concatenate((preds_first, preds_second)) - - # if not self.config.kl_voting: - preds -= np.min(preds, 0) - preds /= np.max(preds, 0) - - preds = np.mean(preds, 1) - gt = np.concatenate( - (np.zeros(preds_first.shape[0]), np.ones(preds_second.shape[0])) - ) - acc = 100 * np.mean((preds >= 0.5) == gt) - return acc + return (X.astype(float), np.expand_dims(Y, 1), cols) \ No newline at end of file diff --git a/amulet/distribution_inference/attacks/suri_satml_2023.py b/amulet/distribution_inference/attacks/suri_satml_2023.py new file mode 100644 index 0000000..954bbdc --- /dev/null +++ b/amulet/distribution_inference/attacks/suri_satml_2023.py @@ -0,0 +1,225 @@ +import torch +import numpy as np +import pandas as pd +from tqdm import tqdm + +from .distribution_inference_attack import DistributionInferenceAttack + +class SuriSATML2023(DistributionInferenceAttack): + """ + Implementation of attribute inference attack from the method from: + https://github.com/vasishtduddu/AttInfExplanations + + + Attributes: + x_train: :class:`~numpy.ndarray` + input features for training adversary' attack model + x_test: :class:`~numpy.ndarray` + input features for testing adversary' attack model + y_train: :class:`~numpy.ndarray` + class labels for train dataset + y_test: :class:`~numpy.ndarray` + class labels for test dataset + z_train: :class:`~numpy.ndarray` + sensitive attributes for training adversary' attack model (includes both "race" and "sex") + z_test: :class:`~numpy.ndarray` + sensitive attributes for training adversary' attack model (includes both "race" and "sex") + filter_prop: str + Filter: "race", "sex" + ratio1: float + ratio of distribution 1 + ratio2: float + ratio of distribution 2 + """ + + def __init__( + self, + x_train: np.ndarray, + x_test: np.ndarray, + y_train: np.ndarray, + y_test: np.ndarray, + z_train: np.ndarray, + z_test: np.ndarray, + filter_prop: str, + ratio1: float, + ratio2: float, + device: str, + dataset_name: str, + ): + super().__init__( + x_train, + x_test, + y_train, + y_test, + z_train, + z_test, + dataset_name, + ratio1, + ratio2, + filter_prop + ) + + self.device = device + + def _get_kl_preds(self, ka, kb, kc1, kc2): + def sigmoid(x): + exp = np.exp(x) + return exp / (1 + exp) + + def KL(x, y): + small_eps = 1e-4 + x_ = np.clip(x, small_eps, 1 - small_eps) + y_ = np.clip(y, small_eps, 1 - small_eps) + x__, y__ = 1 - x_, 1 - y_ + first_term = x_ * (np.log(x_) - np.log(y_)) + second_term = x__ * (np.log(x__) - np.log(y__)) + return np.mean(first_term + second_term, 1) + + def _check(x): + if np.sum(np.isinf(x)) > 0 or np.sum(np.isnan(x)) > 0: + print("Invalid values:", x) + raise ValueError("Invalid values found!") + + def _pairwise_compare(x, y, xx, yy): + x_ = np.expand_dims(x, 2) + y_ = np.expand_dims(y, 2) + y_ = np.transpose(y_, (0, 2, 1)) + pairwise_comparisons = x_ - y_ + preds = np.array([z[xx, yy] for z in pairwise_comparisons]) + return preds + + ka_, kb_ = ka, kb + kc1_, kc2_ = kc1, kc2 + + ka_, kb_ = sigmoid(ka), sigmoid(kb) + kc1_, kc2_ = sigmoid(kc1), sigmoid(kc2) + + small_eps = 1e-4 + log_vals_a = np.log((small_eps + ka_) / (small_eps + 1 - ka_)) + log_vals_b = np.log((small_eps + kb_) / (small_eps + 1 - kb_)) + ordering = np.mean(np.abs(log_vals_a - log_vals_b), 0) + ordering = np.argsort(ordering)[::-1] + # Pick only first half + ordering = ordering[: len(ordering) // 2] + ka_, kb_ = ka_[:, ordering], kb_[:, ordering] + kc1_, kc2_ = kc1_[:, ordering], kc2_[:, ordering] + + # Consider all unique pairs of models + xx, yy = np.triu_indices(ka.shape[0], k=1) + + # Randomly pick pairs of models + random_pick = np.random.permutation(xx.shape[0])[: int(0.8 * xx.shape[0])] + xx, yy = xx[random_pick], yy[random_pick] + + # Compare the KL divergence between the two distributions + # For both sets of victim models + KL_vals_1_a = np.array([KL(ka_, x) for x in kc1_]) + _check(KL_vals_1_a) + KL_vals_1_b = np.array([KL(kb_, x) for x in kc1_]) + _check(KL_vals_1_b) + KL_vals_2_a = np.array([KL(ka_, x) for x in kc2_]) + _check(KL_vals_2_a) + KL_vals_2_b = np.array([KL(kb_, x) for x in kc2_]) + _check(KL_vals_2_b) + + preds_first = _pairwise_compare(KL_vals_1_a, KL_vals_1_b, xx, yy) + preds_second = _pairwise_compare(KL_vals_2_a, KL_vals_2_b, xx, yy) + + return preds_first, preds_second + + def get_preds(self, loader, models): + """ + Get predictions for given models on given data + """ + + predictions = [] + ground_truth = [] + # Accumulate all data for given loader + for data in loader: + labels = data[1] + ground_truth.append(labels.cpu().numpy()) + # if preload: + # inputs.append(features.cuda()) + ground_truth = np.concatenate(ground_truth, axis=0) + + iterator = tqdm(models, desc="Generating Predictions") + for model in iterator: + # Shift model to GPU + model = model.to(self.device) + # Make sure model is in evaluation mode + model.eval() + # Clear GPU cache + torch.cuda.empty_cache() + + with torch.no_grad(): + predictions_on_model = [] + for data in loader: + if len(data) == 2: + data_points, labels = data[0], data[1] + else: + data_points, labels, _ = data[0], data[1], data[2] + prediction = model(data_points.to(self.device)).detach() + # if not multi_class: + prediction = prediction[:, 0] + predictions_on_model.append(prediction) + predictions_on_model = torch.cat(predictions_on_model).cpu().numpy() + predictions.append(predictions_on_model) + # Shift model back to CPU + model = model.cpu() + del model + torch.cuda.empty_cache() + predictions = np.stack(predictions, 0) + torch.cuda.empty_cache() + + return predictions, ground_truth + + def attack( + self, + models_vic_1, + models_vic_2, + models_adv_1, + models_adv_2, + testloader_1, + testloader_2, + ): + preds_vic_prop1_dist1, _ = self.get_preds(testloader_1, models_vic_1) + preds_vic_prop2_dist1, _ = self.get_preds(testloader_1, models_vic_2) + preds_adv_prop1_dist1, _ = self.get_preds(testloader_1, models_adv_1) + preds_adv_prop2_dist1, _ = self.get_preds(testloader_1, models_adv_2) + + preds_vic_prop1_dist2, _ = self.get_preds(testloader_2, models_vic_1) + preds_vic_prop2_dist2, _ = self.get_preds(testloader_2, models_vic_2) + preds_adv_prop1_dist2, _ = self.get_preds(testloader_2, models_adv_1) + preds_adv_prop2_dist2, _ = self.get_preds(testloader_2, models_adv_2) + + preds_1_first, preds_1_second = self._get_kl_preds( + preds_adv_prop1_dist1, + preds_adv_prop2_dist1, + preds_vic_prop1_dist1, + preds_vic_prop2_dist1, + ) + preds_2_first, preds_2_second = self._get_kl_preds( + preds_adv_prop1_dist2, + preds_adv_prop2_dist2, + preds_vic_prop1_dist2, + preds_vic_prop2_dist2, + ) + + # Combine data + preds_first = np.concatenate((preds_1_first, preds_2_first), 1) + preds_second = np.concatenate((preds_1_second, preds_2_second), 1) + preds = np.concatenate((preds_first, preds_second)) + + # if not self.config.kl_voting: + preds -= np.min(preds, 0) + preds /= np.max(preds, 0) + + + # TODO: Why is the ground truth being generated from the predictions? + preds = np.mean(preds, 1) + gt = np.concatenate( + (np.zeros(preds_first.shape[0]), np.ones(preds_second.shape[0])) + ) + acc = 100 * np.mean((preds >= 0.5) == gt) + + return acc diff --git a/examples/attack_pipelines/run_distribution_inference.py b/examples/attack_pipelines/run_distribution_inference.py index 5ff2b12..929866d 100644 --- a/examples/attack_pipelines/run_distribution_inference.py +++ b/examples/attack_pipelines/run_distribution_inference.py @@ -6,7 +6,7 @@ from pathlib import Path import torch -from amulet.distribution_inference.attacks import DistributionInference +from amulet.distribution_inference.attacks import SuriSATML2023 from amulet.utils import load_data, train_classifier, create_dir, get_accuracy from amulet.models.binary_net import BinaryNet @@ -97,7 +97,7 @@ def main(args: argparse.Namespace) -> None: if data.z_train is None or data.z_test is None: raise Exception("Dataset has no sensitive attributes") - distinf = DistributionInference( + distinf = SuriSATML2023( data.x_train, data.x_test, data.y_train, From 0a6cf6462b9a3c651c25e9d7fa62d775595b7651 Mon Sep 17 00:00:00 2001 From: Asim Waheed Date: Mon, 22 Jul 2024 16:14:34 -0400 Subject: [PATCH 2/3] Fix code quality issues --- .../attacks/distribution_inference_attack.py | 10 +++++++--- .../distribution_inference/attacks/suri_satml_2023.py | 5 ++--- .../attack_pipelines/run_distribution_inference.py | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/amulet/distribution_inference/attacks/distribution_inference_attack.py b/amulet/distribution_inference/attacks/distribution_inference_attack.py index 10c408d..2bb0d62 100644 --- a/amulet/distribution_inference/attacks/distribution_inference_attack.py +++ b/amulet/distribution_inference/attacks/distribution_inference_attack.py @@ -6,6 +6,7 @@ from torch.utils.data import TensorDataset, ConcatDataset, DataLoader from tqdm import tqdm + def heuristic( df, condition, @@ -59,6 +60,7 @@ def heuristic( picked_df = pckds[np.argmin(vals)] return picked_df.reset_index(drop=True) + def filter(df, condition, ratio, verbose=True): ratio = float(ratio) qualify = np.nonzero((condition(df)).to_numpy())[0] @@ -80,6 +82,7 @@ def filter(df, condition, ratio, verbose=True): return pd.concat([df.iloc[qi], df.iloc[notqualify]]) return df.iloc[notqualify] + def get_filter(df, filter_prop, split, ratio, dataset_name, is_test): if dataset_name == "census": if filter_prop == "sex": @@ -151,10 +154,11 @@ def lambda_fn(x): verbose=False, ) + # TODO: List of issues to fix: # - Does not use target model, instead trains many "victim" models. # - Hardcoded values for attributes. Needs to be generalized. -# - Need to figure out a design that attacks a single target model. +# - Need to figure out a design that attacks a single target model. # For evaluation using metrics we may need to figure out a more complex pipeline. class DistributionInferenceAttack: def __init__( @@ -316,7 +320,7 @@ def prepare_one_set(TRAIN_DF, TEST_DF, split, prop_ratio, filter_prop): test_loader_1, test_loader_2, ) - + def get_x_y(self, P): # Scale X values Y = P["y"].to_numpy() @@ -328,4 +332,4 @@ def get_x_y(self, P): # print(X["race"].value_counts()) cols = X.columns X = X.to_numpy() - return (X.astype(float), np.expand_dims(Y, 1), cols) \ No newline at end of file + return (X.astype(float), np.expand_dims(Y, 1), cols) diff --git a/amulet/distribution_inference/attacks/suri_satml_2023.py b/amulet/distribution_inference/attacks/suri_satml_2023.py index 954bbdc..c28f79a 100644 --- a/amulet/distribution_inference/attacks/suri_satml_2023.py +++ b/amulet/distribution_inference/attacks/suri_satml_2023.py @@ -1,10 +1,10 @@ import torch import numpy as np -import pandas as pd from tqdm import tqdm from .distribution_inference_attack import DistributionInferenceAttack + class SuriSATML2023(DistributionInferenceAttack): """ Implementation of attribute inference attack from the method from: @@ -56,7 +56,7 @@ def __init__( dataset_name, ratio1, ratio2, - filter_prop + filter_prop, ) self.device = device @@ -214,7 +214,6 @@ def attack( preds -= np.min(preds, 0) preds /= np.max(preds, 0) - # TODO: Why is the ground truth being generated from the predictions? preds = np.mean(preds, 1) gt = np.concatenate( diff --git a/examples/attack_pipelines/run_distribution_inference.py b/examples/attack_pipelines/run_distribution_inference.py index 929866d..37db1dc 100644 --- a/examples/attack_pipelines/run_distribution_inference.py +++ b/examples/attack_pipelines/run_distribution_inference.py @@ -108,7 +108,7 @@ def main(args: argparse.Namespace) -> None: args.ratio1, args.ratio2, args.device, - args, + args.dataset, ) ( vic_trainloader_1, From 58d3d7549fbdab56b1fde2fee9f62872519b82b8 Mon Sep 17 00:00:00 2001 From: Asim Waheed Date: Fri, 26 Jul 2024 18:00:12 -0400 Subject: [PATCH 3/3] Add some type hints --- .../attacks/distribution_inference_attack.py | 78 +++++++++---------- .../attacks/suri_satml_2023.py | 58 +++++++------- 2 files changed, 68 insertions(+), 68 deletions(-) diff --git a/amulet/distribution_inference/attacks/distribution_inference_attack.py b/amulet/distribution_inference/attacks/distribution_inference_attack.py index 2bb0d62..2cab105 100644 --- a/amulet/distribution_inference/attacks/distribution_inference_attack.py +++ b/amulet/distribution_inference/attacks/distribution_inference_attack.py @@ -1,3 +1,5 @@ +from typing import Callable + import sys import pandas as pd import numpy as np @@ -8,16 +10,15 @@ def heuristic( - df, - condition, - ratio, - cwise_sample, - class_imbalance=2.0, - n_tries=1000, - class_col="label", - verbose=True, + df: pd.DataFrame, + condition: Callable[[pd.DataFrame], pd.DataFrame], + ratio: float, + cwise_sample: int | None, + class_imbalance: float = 2.0, + n_tries: int = 1000, + class_col: str = "label", + verbose: bool = True, ): - ratio = float(ratio) vals, pckds = [], [] iterator = range(n_tries) if verbose: @@ -61,8 +62,12 @@ def heuristic( return picked_df.reset_index(drop=True) -def filter(df, condition, ratio, verbose=True): - ratio = float(ratio) +def filter( + df: pd.DataFrame, + condition: Callable[[pd.DataFrame], pd.DataFrame], + ratio: float, + verbose: bool = True, +): qualify = np.nonzero((condition(df)).to_numpy())[0] notqualify = np.nonzero(np.logical_not((condition(df)).to_numpy()))[0] current_ratio = len(qualify) / (len(qualify) + len(notqualify)) @@ -83,20 +88,28 @@ def filter(df, condition, ratio, verbose=True): return df.iloc[notqualify] -def get_filter(df, filter_prop, split, ratio, dataset_name, is_test): - if dataset_name == "census": - if filter_prop == "sex": - - def lambda_fn(x): - return x["sex"] == 1 - elif filter_prop == "race": +def get_filter( + df: pd.DataFrame, + filter_prop: str, + split: str, + ratio: float, + dataset_name: str, + is_test: int, +): + def lambda_fn_sex(x: pd.DataFrame): + return x["sex"] == 1 - def lambda_fn(x): - return x["race"] == 1 - else: - print("Incorrect filter prop") - sys.exit() + def lambda_fn_race(x: pd.DataFrame): + return x["race"] == 1 + if filter_prop == "sex": + lambda_fn = lambda_fn_sex + elif filter_prop == "race": + lambda_fn = lambda_fn_race + else: + print("Incorrect filter prop") + sys.exit() + if dataset_name == "census": prop_wise_subsample_sizes = { "attacker": { "sex": (1100, 500), @@ -118,20 +131,7 @@ def lambda_fn(x): class_col="y", verbose=False, ) - else: - if filter_prop == "sex": - - def lambda_fn(x): - return x["sex"] == 1 - elif filter_prop == "race": - - def lambda_fn(x): - return x["race"] == 1 - else: - print("Incorrect filter prop") - sys.exit() - prop_wise_subsample_sizes = { "attacker": { "sex": (2200, 1200), @@ -214,8 +214,8 @@ def prepare_one_set(TRAIN_DF, TEST_DF, split, prop_ratio, filter_prop): TEST_DF, filter_prop, split, prop_ratio, self.dataset_name, is_test=1 ) # keep the test dataset fixed (x_tr, y_tr, cols), (x_te, y_te, cols) = ( - self.get_x_y(TRAIN_DF), - self.get_x_y(TEST_DF), + self.__get_x_y(TRAIN_DF), + self.__get_x_y(TEST_DF), ) return (x_tr, y_tr), (x_te, y_te), cols @@ -321,7 +321,7 @@ def prepare_one_set(TRAIN_DF, TEST_DF, split, prop_ratio, filter_prop): test_loader_2, ) - def get_x_y(self, P): + def __get_x_y(self, P: pd.DataFrame): # Scale X values Y = P["y"].to_numpy() X = P.drop(columns="y", axis=1) diff --git a/amulet/distribution_inference/attacks/suri_satml_2023.py b/amulet/distribution_inference/attacks/suri_satml_2023.py index c28f79a..56ceab7 100644 --- a/amulet/distribution_inference/attacks/suri_satml_2023.py +++ b/amulet/distribution_inference/attacks/suri_satml_2023.py @@ -61,12 +61,12 @@ def __init__( self.device = device - def _get_kl_preds(self, ka, kb, kc1, kc2): - def sigmoid(x): + def __get_kl_preds(self, ka, kb, kc1, kc2): + def __sigmoid(x): exp = np.exp(x) return exp / (1 + exp) - def KL(x, y): + def __KL(x, y): small_eps = 1e-4 x_ = np.clip(x, small_eps, 1 - small_eps) y_ = np.clip(y, small_eps, 1 - small_eps) @@ -75,12 +75,12 @@ def KL(x, y): second_term = x__ * (np.log(x__) - np.log(y__)) return np.mean(first_term + second_term, 1) - def _check(x): + def __check(x): if np.sum(np.isinf(x)) > 0 or np.sum(np.isnan(x)) > 0: print("Invalid values:", x) raise ValueError("Invalid values found!") - def _pairwise_compare(x, y, xx, yy): + def __pairwise_compare(x, y, xx, yy): x_ = np.expand_dims(x, 2) y_ = np.expand_dims(y, 2) y_ = np.transpose(y_, (0, 2, 1)) @@ -91,8 +91,8 @@ def _pairwise_compare(x, y, xx, yy): ka_, kb_ = ka, kb kc1_, kc2_ = kc1, kc2 - ka_, kb_ = sigmoid(ka), sigmoid(kb) - kc1_, kc2_ = sigmoid(kc1), sigmoid(kc2) + ka_, kb_ = __sigmoid(ka), __sigmoid(kb) + kc1_, kc2_ = __sigmoid(kc1), __sigmoid(kc2) small_eps = 1e-4 log_vals_a = np.log((small_eps + ka_) / (small_eps + 1 - ka_)) @@ -113,21 +113,21 @@ def _pairwise_compare(x, y, xx, yy): # Compare the KL divergence between the two distributions # For both sets of victim models - KL_vals_1_a = np.array([KL(ka_, x) for x in kc1_]) - _check(KL_vals_1_a) - KL_vals_1_b = np.array([KL(kb_, x) for x in kc1_]) - _check(KL_vals_1_b) - KL_vals_2_a = np.array([KL(ka_, x) for x in kc2_]) - _check(KL_vals_2_a) - KL_vals_2_b = np.array([KL(kb_, x) for x in kc2_]) - _check(KL_vals_2_b) - - preds_first = _pairwise_compare(KL_vals_1_a, KL_vals_1_b, xx, yy) - preds_second = _pairwise_compare(KL_vals_2_a, KL_vals_2_b, xx, yy) + KL_vals_1_a = np.array([__KL(ka_, x) for x in kc1_]) + __check(KL_vals_1_a) + KL_vals_1_b = np.array([__KL(kb_, x) for x in kc1_]) + __check(KL_vals_1_b) + KL_vals_2_a = np.array([__KL(ka_, x) for x in kc2_]) + __check(KL_vals_2_a) + KL_vals_2_b = np.array([__KL(kb_, x) for x in kc2_]) + __check(KL_vals_2_b) + + preds_first = __pairwise_compare(KL_vals_1_a, KL_vals_1_b, xx, yy) + preds_second = __pairwise_compare(KL_vals_2_a, KL_vals_2_b, xx, yy) return preds_first, preds_second - def get_preds(self, loader, models): + def __get_preds(self, loader, models): """ Get predictions for given models on given data """ @@ -182,23 +182,23 @@ def attack( testloader_1, testloader_2, ): - preds_vic_prop1_dist1, _ = self.get_preds(testloader_1, models_vic_1) - preds_vic_prop2_dist1, _ = self.get_preds(testloader_1, models_vic_2) - preds_adv_prop1_dist1, _ = self.get_preds(testloader_1, models_adv_1) - preds_adv_prop2_dist1, _ = self.get_preds(testloader_1, models_adv_2) + preds_vic_prop1_dist1, _ = self.__get_preds(testloader_1, models_vic_1) + preds_vic_prop2_dist1, _ = self.__get_preds(testloader_1, models_vic_2) + preds_adv_prop1_dist1, _ = self.__get_preds(testloader_1, models_adv_1) + preds_adv_prop2_dist1, _ = self.__get_preds(testloader_1, models_adv_2) - preds_vic_prop1_dist2, _ = self.get_preds(testloader_2, models_vic_1) - preds_vic_prop2_dist2, _ = self.get_preds(testloader_2, models_vic_2) - preds_adv_prop1_dist2, _ = self.get_preds(testloader_2, models_adv_1) - preds_adv_prop2_dist2, _ = self.get_preds(testloader_2, models_adv_2) + preds_vic_prop1_dist2, _ = self.__get_preds(testloader_2, models_vic_1) + preds_vic_prop2_dist2, _ = self.__get_preds(testloader_2, models_vic_2) + preds_adv_prop1_dist2, _ = self.__get_preds(testloader_2, models_adv_1) + preds_adv_prop2_dist2, _ = self.__get_preds(testloader_2, models_adv_2) - preds_1_first, preds_1_second = self._get_kl_preds( + preds_1_first, preds_1_second = self.__get_kl_preds( preds_adv_prop1_dist1, preds_adv_prop2_dist1, preds_vic_prop1_dist1, preds_vic_prop2_dist1, ) - preds_2_first, preds_2_second = self._get_kl_preds( + preds_2_first, preds_2_second = self.__get_kl_preds( preds_adv_prop1_dist2, preds_adv_prop2_dist2, preds_vic_prop1_dist2,