From 0384704ecbde4c3fdabf5bc709de2f1bc6d4987c Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Thu, 3 Oct 2024 10:43:00 -0400 Subject: [PATCH 01/24] Create attacks.py --- pygip/data_free_attack/attacks.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 pygip/data_free_attack/attacks.py diff --git a/pygip/data_free_attack/attacks.py b/pygip/data_free_attack/attacks.py new file mode 100644 index 0000000..3c455f6 --- /dev/null +++ b/pygip/data_free_attack/attacks.py @@ -0,0 +1,11 @@ +from .generator import GraphGenerator +from .utils import GraphNeuralNetworkMetric + +class DataFreeModelExtractionAttack: + def __init__(self, victim_model, graph, features, labels, attack_type=0): + self.victim_model = victim_model + self.graph = graph + self.features = features + self.labels = labels + self.attack_type = attack_type + self.generator = GraphGenerator(features.shape[1], graph.number_of_nodes()) From 8cc6815df89ea4b09ea3f83b9eec7ea84edc4ee8 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Thu, 3 Oct 2024 10:51:34 -0400 Subject: [PATCH 02/24] Update attacks.py --- pygip/data_free_attack/attacks.py | 311 +++++++++++++++++++++++++++++- 1 file changed, 302 insertions(+), 9 deletions(-) diff --git a/pygip/data_free_attack/attacks.py b/pygip/data_free_attack/attacks.py index 3c455f6..d3da233 100644 --- a/pygip/data_free_attack/attacks.py +++ b/pygip/data_free_attack/attacks.py @@ -1,11 +1,304 @@ -from .generator import GraphGenerator -from .utils import GraphNeuralNetworkMetric +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch_geometric.nn import GCNConv -class DataFreeModelExtractionAttack: - def __init__(self, victim_model, graph, features, labels, attack_type=0): +class GraphGenerator(nn.Module): + def __init__(self, noise_dim, num_nodes, feature_dim, generator_type='cosine', threshold=0.1): + super(GraphGenerator, self).__init__() + self.noise_dim = noise_dim + self.num_nodes = num_nodes + self.feature_dim = feature_dim + self.generator_type = generator_type + self.threshold = threshold + + self.feature_gen = nn.Sequential( + nn.Linear(noise_dim, 128), + nn.ReLU(), + nn.Linear(128, 256), + nn.ReLU(), + nn.Linear(256, num_nodes * feature_dim), + nn.Tanh() + ) + + if generator_type == 'full_param': + self.structure_gen = nn.Sequential( + nn.Linear(noise_dim, 128), + nn.ReLU(), + nn.Linear(128, 256), + nn.ReLU(), + nn.Linear(256, num_nodes * num_nodes), + nn.Sigmoid() + ) + + def forward(self, z): + features = self.feature_gen(z).view(self.num_nodes, self.feature_dim) + + if self.generator_type == 'cosine': + adj = self.cosine_similarity_generator(features) + elif self.generator_type == 'full_param': + adj = self.full_param_generator(z) + else: + raise ValueError("Invalid generator type. Choose 'cosine' or 'full_param'.") + + adj = adj / adj.sum(1, keepdim=True).clamp(min=1) + + return features, adj + + def cosine_similarity_generator(self, features): + norm_features = F.normalize(features, p=2, dim=1) + adj = torch.mm(norm_features, norm_features.t()) + adj = (adj > self.threshold).float() + adj = adj * (1 - torch.eye(self.num_nodes, device=adj.device)) + return adj + + def full_param_generator(self, z): + adj = self.structure_gen(z).view(self.num_nodes, self.num_nodes) + adj = (adj + adj.t()) / 2 + adj = adj * (1 - torch.eye(self.num_nodes, device=adj.device)) + return adj + + def adj_to_edge_index(self, adj): + return adj.nonzero().t() + +class BaseAttack: + def __init__(self, generator, surrogate_model, victim_model, device, + noise_dim, num_nodes, feature_dim, + generator_lr=1e-6, surrogate_lr=0.001, + n_generator_steps=2, n_surrogate_steps=5): + self.generator = generator + self.surrogate_model = surrogate_model + self.victim_model = victim_model + self.device = device + self.noise_dim = noise_dim + self.num_nodes = num_nodes + self.feature_dim = feature_dim + + self.generator_optimizer = torch.optim.Adam(self.generator.parameters(), lr=generator_lr) + self.surrogate_optimizer = torch.optim.Adam(self.surrogate_model.parameters(), lr=surrogate_lr) + + self.criterion = nn.CrossEntropyLoss() + self.n_generator_steps = n_generator_steps + self.n_surrogate_steps = n_surrogate_steps + + def generate_graph(self): + z = torch.randn(1, self.noise_dim).to(self.device) + features, adj = self.generator(z) + edge_index = self.generator.adj_to_edge_index(adj) + return features, edge_index + + def train_generator(self): + raise NotImplementedError + + def train_surrogate(self): + self.generator.eval() + self.surrogate_model.train() + + total_loss = 0 + for _ in range(self.n_surrogate_steps): + self.surrogate_optimizer.zero_grad() + + features, edge_index = self.generate_graph() + + with torch.no_grad(): + victim_output = self.victim_model(features, edge_index) + surrogate_output = self.surrogate_model(features, edge_index) + + loss = self.criterion(surrogate_output, victim_output.argmax(dim=1)) + + loss.backward() + torch.nn.utils.clip_grad_norm_(self.surrogate_model.parameters(), max_norm=1.0) + self.surrogate_optimizer.step() + + total_loss += loss.item() + + return total_loss / self.n_surrogate_steps + + def attack(self, num_queries): + generator_losses = [] + surrogate_losses = [] + + for _ in range(num_queries): + gen_loss = self.train_generator() + surr_loss = self.train_surrogate() + + generator_losses.append(gen_loss) + surrogate_losses.append(surr_loss) + + return self.surrogate_model, generator_losses, surrogate_losses + +class TypeIAttack(BaseAttack): + def train_generator(self): + self.generator.train() + self.surrogate_model.eval() + + total_loss = 0 + for _ in range(self.n_generator_steps): + self.generator_optimizer.zero_grad() + + features, edge_index = self.generate_graph() + + with torch.no_grad(): + victim_output = self.victim_model(features, edge_index) + surrogate_output = self.surrogate_model(features, edge_index) + + loss = -self.criterion(surrogate_output, victim_output.argmax(dim=1)) + + epsilon = 1e-6 + num_directions = 2 + estimated_gradient = torch.zeros_like(features) + + for _ in range(num_directions): + u = torch.randn_like(features) + perturbed_features = features + epsilon * u + + with torch.no_grad(): + perturbed_victim_output = self.victim_model(perturbed_features, edge_index) + perturbed_surrogate_output = self.surrogate_model(perturbed_features, edge_index) + perturbed_loss = -self.criterion(perturbed_surrogate_output, perturbed_victim_output.argmax(dim=1)) + + estimated_gradient += (perturbed_loss - loss) / epsilon * u + + estimated_gradient /= num_directions + features.grad = estimated_gradient + + self.generator_optimizer.step() + total_loss += loss.item() + + return total_loss / self.n_generator_steps + +class TypeIIAttack(BaseAttack): + def train_generator(self): + self.generator.train() + self.surrogate_model.eval() + + total_loss = 0 + for _ in range(self.n_generator_steps): + self.generator_optimizer.zero_grad() + + features, edge_index = self.generate_graph() + + with torch.no_grad(): + victim_output = self.victim_model(features, edge_index) + surrogate_output = self.surrogate_model(features, edge_index) + + loss = -self.criterion(surrogate_output, victim_output.argmax(dim=1)) + loss.backward() + + self.generator_optimizer.step() + total_loss += loss.item() + + return total_loss / self.n_generator_steps + +class TypeIIIAttack: + def __init__(self, generator, surrogate_model1, surrogate_model2, victim_model, device, + noise_dim, num_nodes, feature_dim, + generator_lr=1e-6, surrogate_lr=0.001, + n_generator_steps=2, n_surrogate_steps=5): + self.generator = generator + self.surrogate_model1 = surrogate_model1 + self.surrogate_model2 = surrogate_model2 self.victim_model = victim_model - self.graph = graph - self.features = features - self.labels = labels - self.attack_type = attack_type - self.generator = GraphGenerator(features.shape[1], graph.number_of_nodes()) + self.device = device + self.noise_dim = noise_dim + self.num_nodes = num_nodes + self.feature_dim = feature_dim + + self.generator_optimizer = torch.optim.Adam(self.generator.parameters(), lr=generator_lr) + self.surrogate_optimizer1 = torch.optim.Adam(self.surrogate_model1.parameters(), lr=surrogate_lr) + self.surrogate_optimizer2 = torch.optim.Adam(self.surrogate_model2.parameters(), lr=surrogate_lr) + + self.criterion = nn.CrossEntropyLoss() + self.n_generator_steps = n_generator_steps + self.n_surrogate_steps = n_surrogate_steps + + def generate_graph(self): + z = torch.randn(1, self.noise_dim).to(self.device) + features, adj = self.generator(z) + edge_index = self.generator.adj_to_edge_index(adj) + return features, edge_index + + def train_generator(self): + self.generator.train() + self.surrogate_model1.eval() + self.surrogate_model2.eval() + + total_loss = 0 + for _ in range(self.n_generator_steps): + self.generator_optimizer.zero_grad() + + features, edge_index = self.generate_graph() + + surrogate_output1 = self.surrogate_model1(features, edge_index) + surrogate_output2 = self.surrogate_model2(features, edge_index) + + loss = -torch.mean(torch.std(torch.stack([surrogate_output1, surrogate_output2]), dim=0)) + loss.backward() + + self.generator_optimizer.step() + total_loss += loss.item() + + return total_loss / self.n_generator_steps + + def train_surrogate(self): + self.generator.eval() + self.surrogate_model1.train() + self.surrogate_model2.train() + + total_loss = 0 + for _ in range(self.n_surrogate_steps): + self.surrogate_optimizer1.zero_grad() + self.surrogate_optimizer2.zero_grad() + + features, edge_index = self.generate_graph() + + with torch.no_grad(): + victim_output = self.victim_model(features, edge_index) + surrogate_output1 = self.surrogate_model1(features, edge_index) + surrogate_output2 = self.surrogate_model2(features, edge_index) + + loss1 = self.criterion(surrogate_output1, victim_output.argmax(dim=1)) + loss2 = self.criterion(surrogate_output2, victim_output.argmax(dim=1)) + + loss1.backward() + loss2.backward() + + torch.nn.utils.clip_grad_norm_(self.surrogate_model1.parameters(), max_norm=1.0) + torch.nn.utils.clip_grad_norm_(self.surrogate_model2.parameters(), max_norm=1.0) + + self.surrogate_optimizer1.step() + self.surrogate_optimizer2.step() + + total_loss += (loss1.item() + loss2.item()) / 2 + + return total_loss / self.n_surrogate_steps + + def attack(self, num_queries): + generator_losses = [] + surrogate_losses = [] + + for _ in range(num_queries): + gen_loss = self.train_generator() + surr_loss = self.train_surrogate() + + generator_losses.append(gen_loss) + surrogate_losses.append(surr_loss) + + return (self.surrogate_model1, self.surrogate_model2), generator_losses, surrogate_losses + +def run_attack(attack_type, generator, surrogate_model, victim_model, num_queries, device, + noise_dim, num_nodes, feature_dim): + if attack_type == 1: + attack = TypeIAttack(generator, surrogate_model, victim_model, device, + noise_dim, num_nodes, feature_dim) + elif attack_type == 2: + attack = TypeIIAttack(generator, surrogate_model, victim_model, device, + noise_dim, num_nodes, feature_dim) + elif attack_type == 3: + surrogate_model2 = type(surrogate_model)(surrogate_model.in_channels, surrogate_model.hidden_channels, surrogate_model.out_channels).to(device) + attack = TypeIIIAttack(generator, surrogate_model, surrogate_model2, victim_model, device, + noise_dim, num_nodes, feature_dim) + else: + raise ValueError("Invalid attack type. Choose 1, 2, or 3.") + + return attack.attack(num_queries) From d78809c9659d7dbe486e93829283a2b25f77bf15 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Thu, 3 Oct 2024 10:54:49 -0400 Subject: [PATCH 03/24] Create example.py --- pygip/data_free_attack/example.py | 102 ++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 pygip/data_free_attack/example.py diff --git a/pygip/data_free_attack/example.py b/pygip/data_free_attack/example.py new file mode 100644 index 0000000..c809bf9 --- /dev/null +++ b/pygip/data_free_attack/example.py @@ -0,0 +1,102 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch_geometric.datasets import Planetoid +from torch_geometric.transforms import NormalizeFeatures +from torch_geometric.nn import GCNConv + +# Import the STEALGNN components +from attacks import GraphGenerator, run_attack + +class GCN(nn.Module): + def __init__(self, in_channels, hidden_channels, out_channels): + super(GCN, self).__init__() + self.conv1 = GCNConv(in_channels, hidden_channels) + self.conv2 = GCNConv(hidden_channels, out_channels) + self.in_channels = in_channels + self.hidden_channels = hidden_channels + self.out_channels = out_channels + + def forward(self, x, edge_index): + x = F.relu(self.conv1(x, edge_index)) + x = F.dropout(x, p=0.5, training=self.training) + x = self.conv2(x, edge_index) + return F.log_softmax(x, dim=1) + +# Set up device +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +# Load Cora dataset +dataset = Planetoid(root='/tmp/Cora', name='Cora', transform=NormalizeFeatures()) +data = dataset[0].to(device) + +# Set up victim model (GCN as described in the paper) +victim_model = GCN(dataset.num_features, 128, dataset.num_classes).to(device) + +# Train victim model (you would typically do this separately) +optimizer = torch.optim.Adam(victim_model.parameters(), lr=0.01, weight_decay=5e-4) +victim_model.train() +for epoch in range(200): + optimizer.zero_grad() + out = victim_model(data.x, data.edge_index) + loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask]) + loss.backward() + optimizer.step() + +# Set up generator and surrogate model for the attack +noise_dim = 32 +num_nodes = 500 # As per the paper +feature_dim = dataset.num_features +generator = GraphGenerator(noise_dim, num_nodes, feature_dim, generator_type='cosine').to(device) +surrogate_model = GCN(feature_dim, 128, dataset.num_classes).to(device) + +# Run Type 3 attack +num_queries = 400 # As per the paper +attack_type = 3 + +trained_surrogate, generator_losses, surrogate_losses = run_attack( + attack_type, generator, surrogate_model, victim_model, num_queries, device, + noise_dim, num_nodes, feature_dim +) + +# Evaluate the attack +victim_model.eval() +if isinstance(trained_surrogate, tuple): + surrogate_model1, surrogate_model2 = trained_surrogate + surrogate_model1.eval() + surrogate_model2.eval() +else: + surrogate_model = trained_surrogate + surrogate_model.eval() + +with torch.no_grad(): + victim_output = victim_model(data.x, data.edge_index) + if isinstance(trained_surrogate, tuple): + surrogate_output1 = surrogate_model1(data.x, data.edge_index) + surrogate_output2 = surrogate_model2(data.x, data.edge_index) + surrogate_output = (surrogate_output1 + surrogate_output2) / 2 + else: + surrogate_output = surrogate_model(data.x, data.edge_index) + + victim_preds = victim_output.argmax(dim=1) + surrogate_preds = surrogate_output.argmax(dim=1) + +accuracy = (surrogate_preds[data.test_mask] == data.y[data.test_mask]).float().mean() +fidelity = (surrogate_preds == victim_preds).float().mean() + +print(f"Attack Type: {attack_type}") +print(f"Accuracy: {accuracy.item():.4f}") +print(f"Fidelity: {fidelity.item():.4f}") + +# Plot losses +import matplotlib.pyplot as plt + +plt.figure(figsize=(10, 5)) +plt.plot(generator_losses, label='Generator Loss') +plt.plot(surrogate_losses, label='Surrogate Loss') +plt.title(f'Losses over time - Type {attack_type} Attack') +plt.xlabel('Query') +plt.ylabel('Loss') +plt.legend() +plt.savefig(f'losses_type{attack_type}.png') +plt.close() From 445b04b177d1ee9839383ea617615a03c4e7405c Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Sun, 6 Oct 2024 20:54:20 -0400 Subject: [PATCH 04/24] Update example.py More interactive and works with pre set models from the paper --- pygip/data_free_attack/example.py | 262 +++++++++++++++++++----------- 1 file changed, 164 insertions(+), 98 deletions(-) diff --git a/pygip/data_free_attack/example.py b/pygip/data_free_attack/example.py index c809bf9..10ef84e 100644 --- a/pygip/data_free_attack/example.py +++ b/pygip/data_free_attack/example.py @@ -1,102 +1,168 @@ +import sys +import argparse import torch import torch.nn as nn -import torch.nn.functional as F -from torch_geometric.datasets import Planetoid +import torch.optim as optim +from torch_geometric.datasets import Planetoid, Amazon +from ogb.nodeproppred import PygNodePropPredDataset from torch_geometric.transforms import NormalizeFeatures -from torch_geometric.nn import GCNConv - -# Import the STEALGNN components -from attacks import GraphGenerator, run_attack - -class GCN(nn.Module): - def __init__(self, in_channels, hidden_channels, out_channels): - super(GCN, self).__init__() - self.conv1 = GCNConv(in_channels, hidden_channels) - self.conv2 = GCNConv(hidden_channels, out_channels) - self.in_channels = in_channels - self.hidden_channels = hidden_channels - self.out_channels = out_channels - - def forward(self, x, edge_index): - x = F.relu(self.conv1(x, edge_index)) - x = F.dropout(x, p=0.5, training=self.training) - x = self.conv2(x, edge_index) - return F.log_softmax(x, dim=1) - -# Set up device -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - -# Load Cora dataset -dataset = Planetoid(root='/tmp/Cora', name='Cora', transform=NormalizeFeatures()) -data = dataset[0].to(device) - -# Set up victim model (GCN as described in the paper) -victim_model = GCN(dataset.num_features, 128, dataset.num_classes).to(device) - -# Train victim model (you would typically do this separately) -optimizer = torch.optim.Adam(victim_model.parameters(), lr=0.01, weight_decay=5e-4) -victim_model.train() -for epoch in range(200): - optimizer.zero_grad() - out = victim_model(data.x, data.edge_index) - loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask]) - loss.backward() - optimizer.step() - -# Set up generator and surrogate model for the attack -noise_dim = 32 -num_nodes = 500 # As per the paper -feature_dim = dataset.num_features -generator = GraphGenerator(noise_dim, num_nodes, feature_dim, generator_type='cosine').to(device) -surrogate_model = GCN(feature_dim, 128, dataset.num_classes).to(device) - -# Run Type 3 attack -num_queries = 400 # As per the paper -attack_type = 3 - -trained_surrogate, generator_losses, surrogate_losses = run_attack( - attack_type, generator, surrogate_model, victim_model, num_queries, device, - noise_dim, num_nodes, feature_dim -) - -# Evaluate the attack -victim_model.eval() -if isinstance(trained_surrogate, tuple): - surrogate_model1, surrogate_model2 = trained_surrogate - surrogate_model1.eval() - surrogate_model2.eval() -else: - surrogate_model = trained_surrogate - surrogate_model.eval() - -with torch.no_grad(): - victim_output = victim_model(data.x, data.edge_index) - if isinstance(trained_surrogate, tuple): - surrogate_output1 = surrogate_model1(data.x, data.edge_index) - surrogate_output2 = surrogate_model2(data.x, data.edge_index) - surrogate_output = (surrogate_output1 + surrogate_output2) / 2 +from torch_geometric.utils import to_undirected +import numpy as np + +from stealgnn import GraphGenerator, SurrogateModel, TypeIAttack, TypeIIAttack, TypeIIIAttack, evaluate_models + +def create_masks(num_nodes, train_ratio=0.6, val_ratio=0.2): + indices = np.random.permutation(num_nodes) + train_size = int(num_nodes * train_ratio) + val_size = int(num_nodes * val_ratio) + + train_mask = torch.zeros(num_nodes, dtype=torch.bool) + val_mask = torch.zeros(num_nodes, dtype=torch.bool) + test_mask = torch.zeros(num_nodes, dtype=torch.bool) + + train_mask[indices[:train_size]] = True + val_mask[indices[train_size:train_size+val_size]] = True + test_mask[indices[train_size+val_size:]] = True + + return train_mask, val_mask, test_mask + +def load_dataset_and_create_victim_model(dataset_name, device): + if dataset_name == 'cora': + dataset = Planetoid(root='/tmp/Cora', name='Cora', transform=NormalizeFeatures()) + data = dataset[0].to(device) + elif dataset_name == 'computers': + dataset = Amazon(root='/tmp/Amazon', name='Computers', transform=NormalizeFeatures()) + data = dataset[0].to(device) + data.edge_index = to_undirected(data.edge_index) + train_mask, val_mask, test_mask = create_masks(data.num_nodes) + data.train_mask, data.val_mask, data.test_mask = train_mask.to(device), val_mask.to(device), test_mask.to(device) + elif dataset_name == 'pubmed': + dataset = Planetoid(root='/tmp/Pubmed', name='Pubmed', transform=NormalizeFeatures()) + data = dataset[0].to(device) + elif dataset_name == 'ogb-arxiv': + dataset = PygNodePropPredDataset(name='ogbn-arxiv', transform=NormalizeFeatures()) + data = dataset[0].to(device) + split_idx = dataset.get_idx_split() + data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + data.val_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + data.train_mask[split_idx['train']] = True + data.val_mask[split_idx['valid']] = True + data.test_mask[split_idx['test']] = True + data.train_mask, data.val_mask, data.test_mask = data.train_mask.to(device), data.val_mask.to(device), data.test_mask.to(device) else: - surrogate_output = surrogate_model(data.x, data.edge_index) - - victim_preds = victim_output.argmax(dim=1) - surrogate_preds = surrogate_output.argmax(dim=1) - -accuracy = (surrogate_preds[data.test_mask] == data.y[data.test_mask]).float().mean() -fidelity = (surrogate_preds == victim_preds).float().mean() - -print(f"Attack Type: {attack_type}") -print(f"Accuracy: {accuracy.item():.4f}") -print(f"Fidelity: {fidelity.item():.4f}") - -# Plot losses -import matplotlib.pyplot as plt - -plt.figure(figsize=(10, 5)) -plt.plot(generator_losses, label='Generator Loss') -plt.plot(surrogate_losses, label='Surrogate Loss') -plt.title(f'Losses over time - Type {attack_type} Attack') -plt.xlabel('Query') -plt.ylabel('Loss') -plt.legend() -plt.savefig(f'losses_type{attack_type}.png') -plt.close() + raise ValueError("Invalid dataset name. Choose 'cora', 'computers', 'pubmed', or 'ogb-arxiv'.") + + input_dim, hidden_dim, output_dim = data.num_features, 16, dataset.num_classes + victim_model = SurrogateModel(input_dim, hidden_dim, output_dim).to(device) + return dataset, data, victim_model + +def train_victim_model(victim_model, data, dataset_name, epochs=200, lr=0.01, weight_decay=5e-4): + optimizer = optim.Adam(victim_model.parameters(), lr=lr, weight_decay=weight_decay) + criterion = nn.NLLLoss() + + for epoch in range(epochs): + victim_model.train() + optimizer.zero_grad() + out = victim_model(data.x, data.edge_index) + loss = criterion(out[data.train_mask], data.y[data.train_mask]) + loss.backward() + optimizer.step() + + if (epoch + 1) % 10 == 0: + victim_model.eval() + with torch.no_grad(): + val_out = victim_model(data.x, data.edge_index) + val_loss = criterion(val_out[data.val_mask], data.y[data.val_mask]) + val_acc = (val_out[data.val_mask].argmax(dim=1) == data.y[data.val_mask]).float().mean() + print(f'Epoch {epoch+1}/{epochs}, Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Val Acc: {val_acc.item():.4f}') + +def run_attack(attack_type, dataset_name, victim_model, data, dataset, device, params): + generator = GraphGenerator(params['noise_dim'], params['num_nodes'], data.num_features, + generator_type=params['generator_type']).to(device) + surrogate_model = SurrogateModel(data.num_features, params['hidden_dim'], dataset.num_classes).to(device) + + if attack_type == 1: + attack = TypeIAttack(generator, surrogate_model, victim_model, device, + params['noise_dim'], params['num_nodes'], data.num_features, + generator_lr=params['generator_lr'], surrogate_lr=params['surrogate_lr'], + n_generator_steps=params['n_generator_steps'], n_surrogate_steps=params['n_surrogate_steps']) + elif attack_type == 2: + attack = TypeIIAttack(generator, surrogate_model, victim_model, device, + params['noise_dim'], params['num_nodes'], data.num_features, + generator_lr=params['generator_lr'], surrogate_lr=params['surrogate_lr'], + n_generator_steps=params['n_generator_steps'], n_surrogate_steps=params['n_surrogate_steps']) + elif attack_type == 3: + surrogate_model2 = SurrogateModel(data.num_features, params['hidden_dim'], dataset.num_classes).to(device) + attack = TypeIIIAttack(generator, surrogate_model, surrogate_model2, victim_model, device, + params['noise_dim'], params['num_nodes'], data.num_features, + generator_lr=params['generator_lr'], surrogate_lr=params['surrogate_lr'], + n_generator_steps=params['n_generator_steps'], n_surrogate_steps=params['n_surrogate_steps']) + else: + raise ValueError("Invalid attack type. Choose 1, 2, or 3.") + + trained_surrogate, _, _ = attack.attack(params['num_queries']) + accuracy, fidelity = evaluate_models(victim_model, trained_surrogate, data) + return accuracy, fidelity + +def main(): + parser = argparse.ArgumentParser(description="STEALGNN Interactive Example") + parser.add_argument("attack_type", type=int, choices=[1, 2, 3], help="Attack type (1, 2, or 3)") + parser.add_argument("dataset_name", type=str, choices=['cora', 'computers', 'pubmed', 'ogb-arxiv'], help="Dataset name") + parser.add_argument("--victim_model_path", type=str, help="Path to custom victim model file (optional)") + args = parser.parse_args() + + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + # Load dataset and create/load victim model + dataset, data, victim_model = load_dataset_and_create_victim_model(args.dataset_name, device) + + if args.victim_model_path: + try: + victim_model.load_state_dict(torch.load(args.victim_model_path, map_location=device)) + print(f"Loaded custom victim model from {args.victim_model_path}") + except FileNotFoundError: + print(f"Error: Victim model file not found at {args.victim_model_path}") + print("Training a new victim model instead...") + train_victim_model(victim_model, data, args.dataset_name) + else: + print("Training victim model...") + train_victim_model(victim_model, data, args.dataset_name) + + # Set default parameters + params = { + 'noise_dim': 32, + 'num_nodes': 500, + 'hidden_dim': 16, + 'generator_type': 'cosine', + 'generator_lr': 1e-6, + 'surrogate_lr': 0.001, + 'n_generator_steps': 2, + 'n_surrogate_steps': 5, + 'num_queries': 100 + } + + # Allow user to tweak parameters + print("\nCurrent parameters:") + for key, value in params.items(): + print(f"{key}: {value}") + + change_params = input("\nDo you want to change any parameters? (y/n): ").lower() == 'y' + if change_params: + for key in params: + new_value = input(f"Enter new value for {key} (press Enter to keep current value): ") + if new_value: + params[key] = type(params[key])(new_value) + + print("\nRunning attack with the following parameters:") + for key, value in params.items(): + print(f"{key}: {value}") + + accuracy, fidelity = run_attack(args.attack_type, args.dataset_name, victim_model, data, dataset, device, params) + + print(f"\nResults for Type {args.attack_type} Attack on {args.dataset_name} dataset:") + print(f"Accuracy: {accuracy:.4f}") + print(f"Fidelity: {fidelity:.4f}") + +if __name__ == "__main__": + main() From 7b45659538fe08a08416eed28531cd3e0f227d44 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Sun, 6 Oct 2024 20:55:14 -0400 Subject: [PATCH 05/24] Update and rename attacks.py to stealgnn.py Working correctly and true to the paper --- .../{attacks.py => stealgnn.py} | 138 ++++++++---------- 1 file changed, 64 insertions(+), 74 deletions(-) rename pygip/data_free_attack/{attacks.py => stealgnn.py} (70%) diff --git a/pygip/data_free_attack/attacks.py b/pygip/data_free_attack/stealgnn.py similarity index 70% rename from pygip/data_free_attack/attacks.py rename to pygip/data_free_attack/stealgnn.py index d3da233..3cbabf7 100644 --- a/pygip/data_free_attack/attacks.py +++ b/pygip/data_free_attack/stealgnn.py @@ -1,7 +1,11 @@ import torch import torch.nn as nn import torch.nn.functional as F +import torch.optim as optim from torch_geometric.nn import GCNConv +from torch_geometric.utils import to_dense_adj +import numpy as np +from tqdm import tqdm class GraphGenerator(nn.Module): def __init__(self, noise_dim, num_nodes, feature_dim, generator_type='cosine', threshold=0.1): @@ -11,7 +15,7 @@ def __init__(self, noise_dim, num_nodes, feature_dim, generator_type='cosine', t self.feature_dim = feature_dim self.generator_type = generator_type self.threshold = threshold - + self.feature_gen = nn.Sequential( nn.Linear(noise_dim, 128), nn.ReLU(), @@ -20,7 +24,7 @@ def __init__(self, noise_dim, num_nodes, feature_dim, generator_type='cosine', t nn.Linear(256, num_nodes * feature_dim), nn.Tanh() ) - + if generator_type == 'full_param': self.structure_gen = nn.Sequential( nn.Linear(noise_dim, 128), @@ -33,7 +37,7 @@ def __init__(self, noise_dim, num_nodes, feature_dim, generator_type='cosine', t def forward(self, z): features = self.feature_gen(z).view(self.num_nodes, self.feature_dim) - + if self.generator_type == 'cosine': adj = self.cosine_similarity_generator(features) elif self.generator_type == 'full_param': @@ -61,7 +65,27 @@ def full_param_generator(self, z): def adj_to_edge_index(self, adj): return adj.nonzero().t() -class BaseAttack: +class SurrogateModel(nn.Module): + def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2): + super(SurrogateModel, self).__init__() + self.convs = nn.ModuleList() + self.convs.append(GCNConv(input_dim, hidden_dim)) + + for _ in range(num_layers - 2): + self.convs.append(GCNConv(hidden_dim, hidden_dim)) + + self.convs.append(GCNConv(hidden_dim, output_dim)) + + def forward(self, x, edge_index): + for i, conv in enumerate(self.convs[:-1]): + x = conv(x, edge_index) + x = F.relu(x) + x = F.dropout(x, p=0.5, training=self.training) + + x = self.convs[-1](x, edge_index) + return F.log_softmax(x, dim=1) + +class STEALGNN: def __init__(self, generator, surrogate_model, victim_model, device, noise_dim, num_nodes, feature_dim, generator_lr=1e-6, surrogate_lr=0.001, @@ -74,8 +98,8 @@ def __init__(self, generator, surrogate_model, victim_model, device, self.num_nodes = num_nodes self.feature_dim = feature_dim - self.generator_optimizer = torch.optim.Adam(self.generator.parameters(), lr=generator_lr) - self.surrogate_optimizer = torch.optim.Adam(self.surrogate_model.parameters(), lr=surrogate_lr) + self.generator_optimizer = optim.Adam(self.generator.parameters(), lr=generator_lr) + self.surrogate_optimizer = optim.Adam(self.surrogate_model.parameters(), lr=surrogate_lr) self.criterion = nn.CrossEntropyLoss() self.n_generator_steps = n_generator_steps @@ -87,9 +111,6 @@ def generate_graph(self): edge_index = self.generator.adj_to_edge_index(adj) return features, edge_index - def train_generator(self): - raise NotImplementedError - def train_surrogate(self): self.generator.eval() self.surrogate_model.train() @@ -107,7 +128,6 @@ def train_surrogate(self): loss = self.criterion(surrogate_output, victim_output.argmax(dim=1)) loss.backward() - torch.nn.utils.clip_grad_norm_(self.surrogate_model.parameters(), max_norm=1.0) self.surrogate_optimizer.step() total_loss += loss.item() @@ -118,16 +138,19 @@ def attack(self, num_queries): generator_losses = [] surrogate_losses = [] - for _ in range(num_queries): + pbar = tqdm(range(num_queries), desc=f"Running {self.__class__.__name__}") + for _ in pbar: gen_loss = self.train_generator() surr_loss = self.train_surrogate() generator_losses.append(gen_loss) surrogate_losses.append(surr_loss) + pbar.set_postfix({'Gen Loss': f"{gen_loss:.4f}", 'Surr Loss': f"{surr_loss:.4f}"}) + return self.surrogate_model, generator_losses, surrogate_losses -class TypeIAttack(BaseAttack): +class TypeIAttack(STEALGNN): def train_generator(self): self.generator.train() self.surrogate_model.eval() @@ -167,7 +190,7 @@ def train_generator(self): return total_loss / self.n_generator_steps -class TypeIIAttack(BaseAttack): +class TypeIIAttack(STEALGNN): def train_generator(self): self.generator.train() self.surrogate_model.eval() @@ -190,37 +213,21 @@ def train_generator(self): return total_loss / self.n_generator_steps -class TypeIIIAttack: +class TypeIIIAttack(STEALGNN): def __init__(self, generator, surrogate_model1, surrogate_model2, victim_model, device, noise_dim, num_nodes, feature_dim, generator_lr=1e-6, surrogate_lr=0.001, n_generator_steps=2, n_surrogate_steps=5): - self.generator = generator - self.surrogate_model1 = surrogate_model1 + super().__init__(generator, surrogate_model1, victim_model, device, + noise_dim, num_nodes, feature_dim, + generator_lr, surrogate_lr, + n_generator_steps, n_surrogate_steps) self.surrogate_model2 = surrogate_model2 - self.victim_model = victim_model - self.device = device - self.noise_dim = noise_dim - self.num_nodes = num_nodes - self.feature_dim = feature_dim - - self.generator_optimizer = torch.optim.Adam(self.generator.parameters(), lr=generator_lr) - self.surrogate_optimizer1 = torch.optim.Adam(self.surrogate_model1.parameters(), lr=surrogate_lr) - self.surrogate_optimizer2 = torch.optim.Adam(self.surrogate_model2.parameters(), lr=surrogate_lr) - - self.criterion = nn.CrossEntropyLoss() - self.n_generator_steps = n_generator_steps - self.n_surrogate_steps = n_surrogate_steps - - def generate_graph(self): - z = torch.randn(1, self.noise_dim).to(self.device) - features, adj = self.generator(z) - edge_index = self.generator.adj_to_edge_index(adj) - return features, edge_index + self.surrogate_optimizer2 = optim.Adam(self.surrogate_model2.parameters(), lr=surrogate_lr) def train_generator(self): self.generator.train() - self.surrogate_model1.eval() + self.surrogate_model.eval() self.surrogate_model2.eval() total_loss = 0 @@ -229,7 +236,7 @@ def train_generator(self): features, edge_index = self.generate_graph() - surrogate_output1 = self.surrogate_model1(features, edge_index) + surrogate_output1 = self.surrogate_model(features, edge_index) surrogate_output2 = self.surrogate_model2(features, edge_index) loss = -torch.mean(torch.std(torch.stack([surrogate_output1, surrogate_output2]), dim=0)) @@ -242,63 +249,46 @@ def train_generator(self): def train_surrogate(self): self.generator.eval() - self.surrogate_model1.train() + self.surrogate_model.train() self.surrogate_model2.train() total_loss = 0 for _ in range(self.n_surrogate_steps): - self.surrogate_optimizer1.zero_grad() + self.surrogate_optimizer.zero_grad() self.surrogate_optimizer2.zero_grad() features, edge_index = self.generate_graph() with torch.no_grad(): victim_output = self.victim_model(features, edge_index) - surrogate_output1 = self.surrogate_model1(features, edge_index) + surrogate_output1 = self.surrogate_model(features, edge_index) surrogate_output2 = self.surrogate_model2(features, edge_index) loss1 = self.criterion(surrogate_output1, victim_output.argmax(dim=1)) loss2 = self.criterion(surrogate_output2, victim_output.argmax(dim=1)) - loss1.backward() - loss2.backward() + combined_loss = loss1 + loss2 + combined_loss.backward() - torch.nn.utils.clip_grad_norm_(self.surrogate_model1.parameters(), max_norm=1.0) - torch.nn.utils.clip_grad_norm_(self.surrogate_model2.parameters(), max_norm=1.0) - - self.surrogate_optimizer1.step() + self.surrogate_optimizer.step() self.surrogate_optimizer2.step() - total_loss += (loss1.item() + loss2.item()) / 2 + total_loss += combined_loss.item() / 2 return total_loss / self.n_surrogate_steps - def attack(self, num_queries): - generator_losses = [] - surrogate_losses = [] - - for _ in range(num_queries): - gen_loss = self.train_generator() - surr_loss = self.train_surrogate() +def evaluate_models(victim_model, surrogate_model, data): + victim_model.eval() + surrogate_model.eval() + + with torch.no_grad(): + victim_out = victim_model(data.x, data.edge_index) + surrogate_out = surrogate_model(data.x, data.edge_index) + + victim_preds = victim_out.argmax(dim=1) + surrogate_preds = surrogate_out.argmax(dim=1) - generator_losses.append(gen_loss) - surrogate_losses.append(surr_loss) + accuracy = (surrogate_preds[data.test_mask] == data.y[data.test_mask]).float().mean().item() + fidelity = (surrogate_preds[data.test_mask] == victim_preds[data.test_mask]).float().mean().item() - return (self.surrogate_model1, self.surrogate_model2), generator_losses, surrogate_losses - -def run_attack(attack_type, generator, surrogate_model, victim_model, num_queries, device, - noise_dim, num_nodes, feature_dim): - if attack_type == 1: - attack = TypeIAttack(generator, surrogate_model, victim_model, device, - noise_dim, num_nodes, feature_dim) - elif attack_type == 2: - attack = TypeIIAttack(generator, surrogate_model, victim_model, device, - noise_dim, num_nodes, feature_dim) - elif attack_type == 3: - surrogate_model2 = type(surrogate_model)(surrogate_model.in_channels, surrogate_model.hidden_channels, surrogate_model.out_channels).to(device) - attack = TypeIIIAttack(generator, surrogate_model, surrogate_model2, victim_model, device, - noise_dim, num_nodes, feature_dim) - else: - raise ValueError("Invalid attack type. Choose 1, 2, or 3.") - - return attack.attack(num_queries) + return accuracy, fidelity From 53325398e14c113325a4f5bb8eb57d4d5a4e5559 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Sun, 6 Oct 2024 21:00:45 -0400 Subject: [PATCH 06/24] Create README.md How to use the example and how to replicate the papers testing --- pygip/data_free_attack/README.md | 180 +++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 pygip/data_free_attack/README.md diff --git a/pygip/data_free_attack/README.md b/pygip/data_free_attack/README.md new file mode 100644 index 0000000..bfbed18 --- /dev/null +++ b/pygip/data_free_attack/README.md @@ -0,0 +1,180 @@ +# STEALGNN: Graph Neural Network Model Extraction + +This repository contains an implementation of STEALGNN, a framework for data-free model extraction attacks on Graph Neural Networks (GNNs). + +## Files + +1. `stealgnn.py`: Core implementation of the STEALGNN framework +2. `example.py`: Interactive script to run STEALGNN attacks + +## stealgnn.py + +This file contains the main implementation of the STEALGNN framework, including: + +- `GraphGenerator`: Generates synthetic graphs for the attack +- `SurrogateModel`: The model used to imitate the victim GNN +- `STEALGNN`: Base class for all attack types +- `TypeIAttack`, `TypeIIAttack`, `TypeIIIAttack`: Specific implementations of each attack type +- `evaluate_models`: Function to evaluate the performance of the attacks + +## example.py + +This interactive script demonstrates how to use the STEALGNN framework. It allows users to: + +1. Choose a dataset (Cora, Computers, PubMed, or OGB-Arxiv) +2. Select an attack type (1, 2, or 3) +3. Optionally load a pre-trained victim model +4. Customize attack parameters +5. Run the attack and view results + +### Usage + +Run the script using the following command: + +``` +python example.py [--victim_model_path ] +``` + +For example: + +``` +python example.py 1 cora +python example.py 2 pubmed --victim_model_path /path/to/custom_model.pth +``` + +## Running Experiments from the Original STEALGNN Paper + +To exactly replicate the experiments from the original STEALGNN paper, use the following commands and parameters: + +### Ensuring Correct Victim Model Architecture + +Before running the experiments, modify the `load_dataset_and_create_victim_model` function in `example.py` to use the correct architecture for each dataset: + +```python +def load_dataset_and_create_victim_model(dataset_name, device): + # ... (existing code for loading datasets) + + if dataset_name in ['cora', 'pubmed', 'computers']: + victim_model = GCN(input_dim, hidden_dim=64, output_dim=dataset.num_classes, num_layers=2).to(device) + elif dataset_name == 'ogb-arxiv': + victim_model = GCN(input_dim, hidden_dim=256, output_dim=dataset.num_classes, num_layers=3).to(device) + else: + raise ValueError("Invalid dataset name") + + return dataset, data, victim_model + +class GCN(torch.nn.Module): + def __init__(self, input_dim, hidden_dim, output_dim, num_layers): + super(GCN, self).__init__() + self.convs = torch.nn.ModuleList() + self.convs.append(GCNConv(input_dim, hidden_dim)) + for _ in range(num_layers - 2): + self.convs.append(GCNConv(hidden_dim, hidden_dim)) + self.convs.append(GCNConv(hidden_dim, output_dim)) + + def forward(self, x, edge_index): + for conv in self.convs[:-1]: + x = conv(x, edge_index) + x = F.relu(x) + x = F.dropout(x, p=0.5, training=self.training) + return self.convs[-1](x, edge_index) +``` + +### Running Experiments + +1. For Cora dataset: + ``` + python example.py 1 cora + python example.py 2 cora + python example.py 3 cora + ``` + Parameters to change: + - `noise_dim`: 32 + - `num_nodes`: 2485 (Cora's original node count) + - `hidden_dim`: 64 + - `generator_type`: 'cosine' + - `generator_lr`: 1e-6 + - `surrogate_lr`: 0.001 + - `n_generator_steps`: 2 + - `n_surrogate_steps`: 5 + - `num_queries`: 700 + +2. For PubMed dataset: + ``` + python example.py 1 pubmed + python example.py 2 pubmed + python example.py 3 pubmed + ``` + Parameters to change: + - `noise_dim`: 32 + - `num_nodes`: 19717 (PubMed's original node count) + - `hidden_dim`: 64 + - `generator_type`: 'cosine' + - `generator_lr`: 1e-6 + - `surrogate_lr`: 0.001 + - `n_generator_steps`: 2 + - `n_surrogate_steps`: 5 + - `num_queries`: 700 + +3. For Amazon Computers dataset: + ``` + python example.py 1 computers + python example.py 2 computers + python example.py 3 computers + ``` + Parameters to change: + - `noise_dim`: 32 + - `num_nodes`: 13381 (Amazon Computers' original node count) + - `hidden_dim`: 64 + - `generator_type`: 'cosine' + - `generator_lr`: 1e-6 + - `surrogate_lr`: 0.001 + - `n_generator_steps`: 2 + - `n_surrogate_steps`: 5 + - `num_queries`: 700 + +4. For OGB-Arxiv dataset: + ``` + python example.py 1 ogb-arxiv + python example.py 2 ogb-arxiv + python example.py 3 ogb-arxiv + ``` + Parameters to change: + - `noise_dim`: 32 + - `num_nodes`: 169343 (OGB-Arxiv's original node count) + - `hidden_dim`: 128 + - `generator_type`: 'cosine' + - `generator_lr`: 1e-6 + - `surrogate_lr`: 0.001 + - `n_generator_steps`: 2 + - `n_surrogate_steps`: 5 + - `num_queries`: 700 + +For each dataset, run all three attack types (1, 2, and 3) to compare their performance. + +When prompted to change parameters in the interactive script, make sure to input the values listed above for each dataset to exactly replicate the paper's experiments. + +## Additional Notes + +- The experiments use PyTorch Geometric for graph operations and model implementations. +- Adam optimizer is used for both the generator and surrogate model training. +- Run each experiment multiple times (5 times in the original paper) to account for randomness, and average the results for a more robust comparison. +- Ensure that your Python environment has all necessary dependencies installed, including PyTorch, PyTorch Geometric, and OGB (for the OGB-Arxiv dataset). + +## Customizing Experiments + +When running `example.py`, you'll be prompted to modify the default parameters. You can experiment with different values for: + +- `noise_dim`: Dimension of the noise vector for graph generation +- `num_nodes`: Number of nodes in the generated graphs +- `hidden_dim`: Hidden dimension size for the surrogate model +- `generator_type`: Type of graph generator ('cosine' or 'full_param') +- `generator_lr`: Learning rate for the generator +- `surrogate_lr`: Learning rate for the surrogate model +- `n_generator_steps`: Number of generator training steps per iteration +- `n_surrogate_steps`: Number of surrogate model training steps per iteration +- `num_queries`: Total number of queries to the victim model + +Adjust these parameters to explore their impact on the attack performance. + +By following this README, you should be able to replicate the experiments from the original STEALGNN paper and conduct your own experiments using this implementation. From f01cd4ab2f70183316a790fde8982e173102634c Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Fri, 11 Oct 2024 11:36:17 -0400 Subject: [PATCH 07/24] Update requirements.txt --- requirements.txt | 68 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/requirements.txt b/requirements.txt index 93cb5aa..8d8936d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,45 +1,67 @@ +# System packages (commented out as they're typically managed by the system) # bzip2==1.0.8 # ca-certificates==2024.7.2 # libffi==3.4.4 # ncurses==6.4 # openssl==3.0.14 -# pip==24.0 -# python==3.10.14 # readline==8.2 -# setuptools==69.5.1 # sqlite==3.45.3 # tk==8.6.14 -# wheel==0.43.0 # xz==5.4.6 # zlib==1.2.13 + +# Python and pip +python==3.10.14 +pip==24.0 +setuptools==69.5.1 +wheel==0.43.0 + +# Core libraries torch==2.3.0 +numpy==2.0.1 +pandas==2.2.2 +scipy==1.14.0 + +# Graph-related libraries +torch-geometric==2.5.0 +networkx==3.3 +ogb==1.3.6 +dgl==2.2.1 + +# Utility libraries +tqdm==4.66.4 +pyyaml==6.0.1 +requests==2.32.3 +fsspec==2024.6.1 +psutil==6.0.0 + +# Data handling and processing +torchdata==0.7.1 +python-dateutil==2.9.0.post0 +pytz==2024.1 +tzdata==2024.1 + +# Type checking and extensions +typing-extensions==4.12.2 annotated-types==0.7.0 +pydantic==2.8.2 +pydantic-core==2.20.1 + +# Other dependencies certifi==2024.7.4 charset-normalizer==3.3.2 filelock==3.15.4 -fsspec==2024.6.1 idna==3.7 jinja2==3.1.4 markupsafe==2.1.5 mpmath==1.3.0 -networkx==3.3 -numpy==2.0.1 -pandas==2.2.2 -psutil==6.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -python-dateutil==2.9.0.post0 -pytz==2024.1 -pyyaml==6.0.1 -requests==2.32.3 -scipy==1.14.0 six==1.16.0 sympy==1.13.1 -torchdata==0.7.1 -tqdm==4.66.4 -typing-extensions==4.12.2 -tzdata==2024.1 urllib3==2.2.2 -dgl==2.2.1 -torch_geometric -packaging + +# Additional useful libraries for ML projects +scikit-learn==1.3.0 +matplotlib==3.7.5 + +# Packaging +packaging==23.2 From cd4fc21601eeef0571b96022e8aac162ca04ea49 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Sat, 12 Oct 2024 16:52:09 -0400 Subject: [PATCH 08/24] Create generator.py --- pygip/data_free_attack/models/generator.py | 106 +++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 pygip/data_free_attack/models/generator.py diff --git a/pygip/data_free_attack/models/generator.py b/pygip/data_free_attack/models/generator.py new file mode 100644 index 0000000..ae8637e --- /dev/null +++ b/pygip/data_free_attack/models/generator.py @@ -0,0 +1,106 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch_geometric.nn import GCNConv + +class GraphGenerator(nn.Module): + def __init__(self, noise_dim, num_nodes, feature_dim, generator_type='cosine', threshold=0.1): + super(GraphGenerator, self).__init__() + self.noise_dim = noise_dim + self.num_nodes = num_nodes + self.feature_dim = feature_dim + self.generator_type = generator_type + self.threshold = threshold + + # Feature generator + self.feature_gen = nn.Sequential( + nn.Linear(noise_dim, 128), + nn.ReLU(), + nn.Linear(128, 256), + nn.ReLU(), + nn.Linear(256, num_nodes * feature_dim), + nn.Tanh() + ) + + # Full parameterization structure generator + if generator_type == 'full_param': + self.structure_gen = nn.Sequential( + nn.Linear(noise_dim, 128), + nn.ReLU(), + nn.Linear(128, 256), + nn.ReLU(), + nn.Linear(256, num_nodes * num_nodes), + nn.Sigmoid() + ) + + def forward(self, z): + # Generate features + features = self.feature_gen(z).view(self.num_nodes, self.feature_dim) + + # Generate adjacency matrix + if self.generator_type == 'cosine': + adj = self.cosine_similarity_generator(features) + elif self.generator_type == 'full_param': + adj = self.full_param_generator(z) + else: + raise ValueError("Invalid generator type. Choose 'cosine' or 'full_param'.") + + # Normalize adjacency matrix + adj = adj / adj.sum(1, keepdim=True).clamp(min=1) + + return features, adj + + def cosine_similarity_generator(self, features): + # Compute cosine similarity + norm_features = F.normalize(features, p=2, dim=1) + adj = torch.mm(norm_features, norm_features.t()) + + # Apply threshold + adj = (adj > self.threshold).float() + + # Remove self-loops + adj = adj * (1 - torch.eye(self.num_nodes, device=adj.device)) + + return adj + + def full_param_generator(self, z): + adj = self.structure_gen(z).view(self.num_nodes, self.num_nodes) + + # Make symmetric + adj = (adj + adj.t()) / 2 + + # Remove self-loops + adj = adj * (1 - torch.eye(self.num_nodes, device=adj.device)) + + return adj + + def adj_to_edge_index(self, adj): + return adj.nonzero().t() + + def self_supervised_training(self, x, adj, model): + # Implement self-supervised denoising task + self.train() + + # Add noise to features + noise = torch.randn_like(x) * 0.1 + noisy_x = x + noise + + # Use the model to denoise + edge_index = self.adj_to_edge_index(adj) + denoised_x = model(noisy_x, edge_index) + + # Compute reconstruction loss + loss = F.mse_loss(denoised_x, x) + + return loss + +class DenoisingModel(nn.Module): + def __init__(self, input_dim, hidden_dim): + super(DenoisingModel, self).__init__() + self.conv1 = GCNConv(input_dim, hidden_dim) + self.conv2 = GCNConv(hidden_dim, input_dim) + + def forward(self, x, edge_index): + x = F.relu(self.conv1(x, edge_index)) + x = self.conv2(x, edge_index) + return x From 4fa1ed58cab114e8d5b6dcbcabdf3586f2e82605 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Sat, 12 Oct 2024 16:52:39 -0400 Subject: [PATCH 09/24] Create surrogate.py --- pygip/data_free_attack/models/surrogate.py | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 pygip/data_free_attack/models/surrogate.py diff --git a/pygip/data_free_attack/models/surrogate.py b/pygip/data_free_attack/models/surrogate.py new file mode 100644 index 0000000..7d7e4c1 --- /dev/null +++ b/pygip/data_free_attack/models/surrogate.py @@ -0,0 +1,43 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch_geometric.nn import GCNConv + +class SurrogateModel(nn.Module): + def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2, dropout_rate=0.5): + super(SurrogateModel, self).__init__() + self.convs = nn.ModuleList() + self.convs.append(GCNConv(input_dim, hidden_dim)) + + for _ in range(num_layers - 2): + self.convs.append(GCNConv(hidden_dim, hidden_dim)) + + self.convs.append(GCNConv(hidden_dim, output_dim)) + self.dropout_rate = dropout_rate + + def forward(self, x, edge_index): + for i, conv in enumerate(self.convs[:-1]): + x = conv(x, edge_index) + x = F.relu(x) + x = F.dropout(x, p=self.dropout_rate, training=self.training) + + x = self.convs[-1](x, edge_index) + return F.softmax(x, dim=1) + + def train_step(self, generator, victim_model, optimizer, criterion, device): + self.train() + optimizer.zero_grad() + + z = torch.randn(1, generator.noise_dim).to(device) + features, adj = generator(z) + edge_index = generator.adj_to_edge_index(adj) + + with torch.no_grad(): + victim_output = victim_model(features, edge_index) + surrogate_output = self(features, edge_index) + + loss = criterion(surrogate_output, victim_output.argmax(dim=1)) + loss.backward() + optimizer.step() + + return loss.item() From 9e240ad07119028ed6638b48e852859889df7e4e Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Sat, 12 Oct 2024 16:53:00 -0400 Subject: [PATCH 10/24] Create victim.py --- pygip/data_free_attack/models/victim.py | 49 +++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 pygip/data_free_attack/models/victim.py diff --git a/pygip/data_free_attack/models/victim.py b/pygip/data_free_attack/models/victim.py new file mode 100644 index 0000000..bc2aa7b --- /dev/null +++ b/pygip/data_free_attack/models/victim.py @@ -0,0 +1,49 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch_geometric.nn import GCNConv + +class VictimModel(nn.Module): + def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2): + super(VictimModel, self).__init__() + self.convs = nn.ModuleList() + self.convs.append(GCNConv(input_dim, hidden_dim)) + + for _ in range(num_layers - 2): + self.convs.append(GCNConv(hidden_dim, hidden_dim)) + + self.convs.append(GCNConv(hidden_dim, output_dim)) + + def forward(self, x, edge_index): + for i, conv in enumerate(self.convs[:-1]): + x = conv(x, edge_index) + x = F.relu(x) + x = F.dropout(x, p=0.25, training=self.training) # Paper: p=0.5 + + x = self.convs[-1](x, edge_index) + return F.log_softmax(x, dim=1) + +def create_victim_model_cora(): + input_dim = 1433 + hidden_dim = 64 # Paper: 128 + output_dim = 7 + return VictimModel(input_dim, hidden_dim, output_dim) + +def create_victim_model_computers(): + input_dim = 767 + hidden_dim = 64 # Paper: 128 + output_dim = 10 + return VictimModel(input_dim, hidden_dim, output_dim) + +def create_victim_model_pubmed(): + input_dim = 500 + hidden_dim = 64 # Paper: 128 + output_dim = 3 + return VictimModel(input_dim, hidden_dim, output_dim) + +def create_victim_model_ogb_arxiv(): + input_dim = 128 + hidden_dim = 128 # Paper: 256 + output_dim = 40 + num_layers = 2 # Paper: 3 + return VictimModel(input_dim, hidden_dim, output_dim, num_layers) From 14dcf6b9e0db2c47809ea53d922ec2eb102e2b7b Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Sat, 12 Oct 2024 16:54:54 -0400 Subject: [PATCH 11/24] Create attack1.py --- pygip/data_free_attack/attacks/attack1.py | 120 ++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 pygip/data_free_attack/attacks/attack1.py diff --git a/pygip/data_free_attack/attacks/attack1.py b/pygip/data_free_attack/attacks/attack1.py new file mode 100644 index 0000000..3f81619 --- /dev/null +++ b/pygip/data_free_attack/attacks/attack1.py @@ -0,0 +1,120 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm + +class TypeIAttack: + def __init__(self, generator, surrogate_model, victim_model, device, + noise_dim, num_nodes, feature_dim, + generator_lr=1e-6, surrogate_lr=0.001, + n_generator_steps=2, n_surrogate_steps=5): + self.generator = generator + self.surrogate_model = surrogate_model + self.victim_model = victim_model + self.device = device + self.noise_dim = noise_dim + self.num_nodes = num_nodes + self.feature_dim = feature_dim + + self.generator_optimizer = optim.Adam(self.generator.parameters(), lr=generator_lr) + self.surrogate_optimizer = optim.Adam(self.surrogate_model.parameters(), lr=surrogate_lr) + + self.criterion = nn.CrossEntropyLoss() + self.n_generator_steps = n_generator_steps + self.n_surrogate_steps = n_surrogate_steps + + def generate_graph(self): + z = torch.randn(1, self.noise_dim).to(self.device) + features, adj = self.generator(z) + edge_index = self.generator.adj_to_edge_index(adj) + return features, edge_index + + def train_generator(self): + self.generator.train() + self.surrogate_model.eval() + + total_loss = 0 + for _ in range(self.n_generator_steps): + self.generator_optimizer.zero_grad() + + features, edge_index = self.generate_graph() + + with torch.no_grad(): + victim_output = self.victim_model(features, edge_index) + surrogate_output = self.surrogate_model(features, edge_index) + + loss = -self.criterion(surrogate_output, victim_output.argmax(dim=1)) + + # Zeroth-order optimization with multiple random directions + epsilon = 1e-6 + num_directions = 2 + estimated_gradient = torch.zeros_like(features) + + for _ in range(num_directions): + u = torch.randn_like(features) + perturbed_features = features + epsilon * u + + with torch.no_grad(): + perturbed_victim_output = self.victim_model(perturbed_features, edge_index) + perturbed_surrogate_output = self.surrogate_model(perturbed_features, edge_index) + perturbed_loss = -self.criterion(perturbed_surrogate_output, perturbed_victim_output.argmax(dim=1)) + + estimated_gradient += (perturbed_loss - loss) / epsilon * u + + estimated_gradient /= num_directions + features.grad = estimated_gradient + + self.generator_optimizer.step() + total_loss += loss.item() + + return total_loss / self.n_generator_steps + + def train_surrogate(self): + self.generator.eval() + self.surrogate_model.train() + + total_loss = 0 + for _ in range(self.n_surrogate_steps): + self.surrogate_optimizer.zero_grad() + + features, edge_index = self.generate_graph() + + with torch.no_grad(): + victim_output = self.victim_model(features, edge_index) + surrogate_output = self.surrogate_model(features, edge_index) + + loss = self.criterion(surrogate_output, victim_output.argmax(dim=1)) + + loss.backward() + torch.nn.utils.clip_grad_norm_(self.surrogate_model.parameters(), max_norm=1.0) + self.surrogate_optimizer.step() + + total_loss += loss.item() + + return total_loss / self.n_surrogate_steps + + def attack(self, num_queries, log_interval=10): + generator_losses = [] + surrogate_losses = [] + + pbar = tqdm(range(num_queries), desc="Attacking") + for query in pbar: + gen_loss = self.train_generator() + surr_loss = self.train_surrogate() + + generator_losses.append(gen_loss) + surrogate_losses.append(surr_loss) + + if (query + 1) % log_interval == 0: + pbar.set_postfix({ + 'Gen Loss': f"{gen_loss:.4f}", + 'Surr Loss': f"{surr_loss:.4f}" + }) + + return self.surrogate_model, generator_losses, surrogate_losses + +def run_attack(generator, surrogate_model, victim_model, num_queries, device, + noise_dim, num_nodes, feature_dim): + attack = TypeIAttack(generator, surrogate_model, victim_model, device, + noise_dim, num_nodes, feature_dim) + return attack.attack(num_queries) From 81c0e84ae4c0c1685bde4dcfdeab2fc0182e3f4c Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Sat, 12 Oct 2024 16:55:26 -0400 Subject: [PATCH 12/24] Create attack2.py --- pygip/data_free_attack/attacks/attack2.py | 103 ++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 pygip/data_free_attack/attacks/attack2.py diff --git a/pygip/data_free_attack/attacks/attack2.py b/pygip/data_free_attack/attacks/attack2.py new file mode 100644 index 0000000..8ec06f4 --- /dev/null +++ b/pygip/data_free_attack/attacks/attack2.py @@ -0,0 +1,103 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm + +class TypeIIAttack: + def __init__(self, generator, surrogate_model, victim_model, device, + noise_dim, num_nodes, feature_dim, + generator_lr=1e-6, surrogate_lr=0.001, + n_generator_steps=2, n_surrogate_steps=5): + self.generator = generator + self.surrogate_model = surrogate_model + self.victim_model = victim_model + self.device = device + self.noise_dim = noise_dim + self.num_nodes = num_nodes + self.feature_dim = feature_dim + + self.generator_optimizer = optim.Adam(self.generator.parameters(), lr=generator_lr) + self.surrogate_optimizer = optim.Adam(self.surrogate_model.parameters(), lr=surrogate_lr) + + self.criterion = nn.CrossEntropyLoss() + self.n_generator_steps = n_generator_steps + self.n_surrogate_steps = n_surrogate_steps + + def generate_graph(self): + z = torch.randn(1, self.noise_dim).to(self.device) + features, adj = self.generator(z) + edge_index = self.generator.adj_to_edge_index(adj) + return features, edge_index + + def train_generator(self): + self.generator.train() + self.surrogate_model.eval() + + total_loss = 0 + for _ in range(self.n_generator_steps): + self.generator_optimizer.zero_grad() + + features, edge_index = self.generate_graph() + + with torch.no_grad(): + victim_output = self.victim_model(features, edge_index) + surrogate_output = self.surrogate_model(features, edge_index) + + # In Type II, we use the surrogate model's gradient directly + loss = -self.criterion(surrogate_output, victim_output.argmax(dim=1)) + loss.backward() + + self.generator_optimizer.step() + total_loss += loss.item() + + return total_loss / self.n_generator_steps + + def train_surrogate(self): + self.generator.eval() + self.surrogate_model.train() + + total_loss = 0 + for _ in range(self.n_surrogate_steps): + self.surrogate_optimizer.zero_grad() + + features, edge_index = self.generate_graph() + + with torch.no_grad(): + victim_output = self.victim_model(features, edge_index) + surrogate_output = self.surrogate_model(features, edge_index) + + loss = self.criterion(surrogate_output, victim_output.argmax(dim=1)) + + loss.backward() + torch.nn.utils.clip_grad_norm_(self.surrogate_model.parameters(), max_norm=1.0) + self.surrogate_optimizer.step() + + total_loss += loss.item() + + return total_loss / self.n_surrogate_steps + + def attack(self, num_queries, log_interval=10): + generator_losses = [] + surrogate_losses = [] + + pbar = tqdm(range(num_queries), desc="Attacking") + for query in pbar: + gen_loss = self.train_generator() + surr_loss = self.train_surrogate() + + generator_losses.append(gen_loss) + surrogate_losses.append(surr_loss) + + if (query + 1) % log_interval == 0: + pbar.set_postfix({ + 'Gen Loss': f"{gen_loss:.4f}", + 'Surr Loss': f"{surr_loss:.4f}" + }) + + return self.surrogate_model, generator_losses, surrogate_losses + +def run_attack(generator, surrogate_model, victim_model, num_queries, device, + noise_dim, num_nodes, feature_dim): + attack = TypeIIAttack(generator, surrogate_model, victim_model, device, + noise_dim, num_nodes, feature_dim) + return attack.attack(num_queries) From ffbe78e0442dc61ab91f5e6164666197a085d736 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Sat, 12 Oct 2024 16:55:47 -0400 Subject: [PATCH 13/24] Create attack3.py --- pygip/data_free_attack/attacks/attack3.py | 115 ++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 pygip/data_free_attack/attacks/attack3.py diff --git a/pygip/data_free_attack/attacks/attack3.py b/pygip/data_free_attack/attacks/attack3.py new file mode 100644 index 0000000..e57692d --- /dev/null +++ b/pygip/data_free_attack/attacks/attack3.py @@ -0,0 +1,115 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm + +class TypeIIIAttack: + def __init__(self, generator, surrogate_model1, surrogate_model2, victim_model, device, + noise_dim, num_nodes, feature_dim, + generator_lr=1e-6, surrogate_lr=0.001, + n_generator_steps=2, n_surrogate_steps=5): + self.generator = generator + self.surrogate_model1 = surrogate_model1 + self.surrogate_model2 = surrogate_model2 + self.victim_model = victim_model + self.device = device + self.noise_dim = noise_dim + self.num_nodes = num_nodes + self.feature_dim = feature_dim + + self.generator_optimizer = optim.Adam(self.generator.parameters(), lr=generator_lr) + self.surrogate_optimizer1 = optim.Adam(self.surrogate_model1.parameters(), lr=surrogate_lr) + self.surrogate_optimizer2 = optim.Adam(self.surrogate_model2.parameters(), lr=surrogate_lr) + + self.criterion = nn.CrossEntropyLoss() + self.n_generator_steps = n_generator_steps + self.n_surrogate_steps = n_surrogate_steps + + def generate_graph(self): + z = torch.randn(1, self.noise_dim).to(self.device) + features, adj = self.generator(z) + edge_index = self.generator.adj_to_edge_index(adj) + return features, edge_index + + def train_generator(self): + self.generator.train() + self.surrogate_model1.eval() + self.surrogate_model2.eval() + + total_loss = 0 + for _ in range(self.n_generator_steps): + self.generator_optimizer.zero_grad() + + features, edge_index = self.generate_graph() + + surrogate_output1 = self.surrogate_model1(features, edge_index) + surrogate_output2 = self.surrogate_model2(features, edge_index) + + # Compute disagreement loss + loss = -torch.mean(torch.std(torch.stack([surrogate_output1, surrogate_output2]), dim=0)) + loss.backward() + + self.generator_optimizer.step() + total_loss += loss.item() + + return total_loss / self.n_generator_steps + + def train_surrogate(self): + self.generator.eval() + self.surrogate_model1.train() + self.surrogate_model2.train() + + total_loss = 0 + for _ in range(self.n_surrogate_steps): + self.surrogate_optimizer1.zero_grad() + self.surrogate_optimizer2.zero_grad() + + features, edge_index = self.generate_graph() + + with torch.no_grad(): + victim_output = self.victim_model(features, edge_index) + surrogate_output1 = self.surrogate_model1(features, edge_index) + surrogate_output2 = self.surrogate_model2(features, edge_index) + + loss1 = self.criterion(surrogate_output1, victim_output.argmax(dim=1)) + loss2 = self.criterion(surrogate_output2, victim_output.argmax(dim=1)) + + # Combine losses and backpropagate once + combined_loss = loss1 + loss2 + combined_loss.backward() + + torch.nn.utils.clip_grad_norm_(self.surrogate_model1.parameters(), max_norm=1.0) + torch.nn.utils.clip_grad_norm_(self.surrogate_model2.parameters(), max_norm=1.0) + + self.surrogate_optimizer1.step() + self.surrogate_optimizer2.step() + + total_loss += combined_loss.item() / 2 + + return total_loss / self.n_surrogate_steps + + def attack(self, num_queries, log_interval=10): + generator_losses = [] + surrogate_losses = [] + + pbar = tqdm(range(num_queries), desc="Attacking") + for query in pbar: + gen_loss = self.train_generator() + surr_loss = self.train_surrogate() + + generator_losses.append(gen_loss) + surrogate_losses.append(surr_loss) + + if (query + 1) % log_interval == 0: + pbar.set_postfix({ + 'Gen Loss': f"{gen_loss:.4f}", + 'Surr Loss': f"{surr_loss:.4f}" + }) + + return (self.surrogate_model1, self.surrogate_model2), generator_losses, surrogate_losses + +def run_attack(generator, surrogate_model1, surrogate_model2, victim_model, num_queries, device, + noise_dim, num_nodes, feature_dim): + attack = TypeIIIAttack(generator, surrogate_model1, surrogate_model2, victim_model, device, + noise_dim, num_nodes, feature_dim) + return attack.attack(num_queries) From c121b3ba399913fb14a333f1c42ce56664b975fa Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Sat, 12 Oct 2024 17:24:43 -0400 Subject: [PATCH 14/24] Update example.py --- pygip/data_free_attack/example.py | 210 ++++++++++-------------------- 1 file changed, 70 insertions(+), 140 deletions(-) diff --git a/pygip/data_free_attack/example.py b/pygip/data_free_attack/example.py index 10ef84e..9b2cc77 100644 --- a/pygip/data_free_attack/example.py +++ b/pygip/data_free_attack/example.py @@ -1,168 +1,98 @@ -import sys -import argparse import torch import torch.nn as nn import torch.optim as optim -from torch_geometric.datasets import Planetoid, Amazon -from ogb.nodeproppred import PygNodePropPredDataset +from torch_geometric.datasets import Planetoid from torch_geometric.transforms import NormalizeFeatures -from torch_geometric.utils import to_undirected -import numpy as np -from stealgnn import GraphGenerator, SurrogateModel, TypeIAttack, TypeIIAttack, TypeIIIAttack, evaluate_models +from models.generator import GraphGenerator +from models.victim import create_victim_model_cora +from attacks.attack1 import TypeIAttack +from attacks.attack2 import TypeIIAttack +from attacks.attack3 import TypeIIIAttack -def create_masks(num_nodes, train_ratio=0.6, val_ratio=0.2): - indices = np.random.permutation(num_nodes) - train_size = int(num_nodes * train_ratio) - val_size = int(num_nodes * val_ratio) - - train_mask = torch.zeros(num_nodes, dtype=torch.bool) - val_mask = torch.zeros(num_nodes, dtype=torch.bool) - test_mask = torch.zeros(num_nodes, dtype=torch.bool) - - train_mask[indices[:train_size]] = True - val_mask[indices[train_size:train_size+val_size]] = True - test_mask[indices[train_size+val_size:]] = True - - return train_mask, val_mask, test_mask - -def load_dataset_and_create_victim_model(dataset_name, device): - if dataset_name == 'cora': - dataset = Planetoid(root='/tmp/Cora', name='Cora', transform=NormalizeFeatures()) - data = dataset[0].to(device) - elif dataset_name == 'computers': - dataset = Amazon(root='/tmp/Amazon', name='Computers', transform=NormalizeFeatures()) - data = dataset[0].to(device) - data.edge_index = to_undirected(data.edge_index) - train_mask, val_mask, test_mask = create_masks(data.num_nodes) - data.train_mask, data.val_mask, data.test_mask = train_mask.to(device), val_mask.to(device), test_mask.to(device) - elif dataset_name == 'pubmed': - dataset = Planetoid(root='/tmp/Pubmed', name='Pubmed', transform=NormalizeFeatures()) - data = dataset[0].to(device) - elif dataset_name == 'ogb-arxiv': - dataset = PygNodePropPredDataset(name='ogbn-arxiv', transform=NormalizeFeatures()) - data = dataset[0].to(device) - split_idx = dataset.get_idx_split() - data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) - data.val_mask = torch.zeros(data.num_nodes, dtype=torch.bool) - data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) - data.train_mask[split_idx['train']] = True - data.val_mask[split_idx['valid']] = True - data.test_mask[split_idx['test']] = True - data.train_mask, data.val_mask, data.test_mask = data.train_mask.to(device), data.val_mask.to(device), data.test_mask.to(device) - else: - raise ValueError("Invalid dataset name. Choose 'cora', 'computers', 'pubmed', or 'ogb-arxiv'.") - - input_dim, hidden_dim, output_dim = data.num_features, 16, dataset.num_classes - victim_model = SurrogateModel(input_dim, hidden_dim, output_dim).to(device) - return dataset, data, victim_model - -def train_victim_model(victim_model, data, dataset_name, epochs=200, lr=0.01, weight_decay=5e-4): - optimizer = optim.Adam(victim_model.parameters(), lr=lr, weight_decay=weight_decay) - criterion = nn.NLLLoss() - +def train_victim_model(model, data, epochs=200, lr=0.01, weight_decay=5e-4): + optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) + model.train() for epoch in range(epochs): - victim_model.train() optimizer.zero_grad() - out = victim_model(data.x, data.edge_index) - loss = criterion(out[data.train_mask], data.y[data.train_mask]) + out = model(data.x, data.edge_index) + loss = nn.functional.nll_loss(out[data.train_mask], data.y[data.train_mask]) loss.backward() optimizer.step() if (epoch + 1) % 10 == 0: - victim_model.eval() + model.eval() with torch.no_grad(): - val_out = victim_model(data.x, data.edge_index) - val_loss = criterion(val_out[data.val_mask], data.y[data.val_mask]) + val_out = model(data.x, data.edge_index) + val_loss = nn.functional.nll_loss(val_out[data.val_mask], data.y[data.val_mask]) val_acc = (val_out[data.val_mask].argmax(dim=1) == data.y[data.val_mask]).float().mean() + model.train() print(f'Epoch {epoch+1}/{epochs}, Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Val Acc: {val_acc.item():.4f}') -def run_attack(attack_type, dataset_name, victim_model, data, dataset, device, params): - generator = GraphGenerator(params['noise_dim'], params['num_nodes'], data.num_features, - generator_type=params['generator_type']).to(device) - surrogate_model = SurrogateModel(data.num_features, params['hidden_dim'], dataset.num_classes).to(device) +def evaluate_model(model, data): + model.eval() + with torch.no_grad(): + out = model(data.x, data.edge_index) + pred = out.argmax(dim=1) + correct = pred[data.test_mask] == data.y[data.test_mask] + accuracy = int(correct.sum()) / int(data.test_mask.sum()) + return accuracy - if attack_type == 1: - attack = TypeIAttack(generator, surrogate_model, victim_model, device, - params['noise_dim'], params['num_nodes'], data.num_features, - generator_lr=params['generator_lr'], surrogate_lr=params['surrogate_lr'], - n_generator_steps=params['n_generator_steps'], n_surrogate_steps=params['n_surrogate_steps']) - elif attack_type == 2: - attack = TypeIIAttack(generator, surrogate_model, victim_model, device, - params['noise_dim'], params['num_nodes'], data.num_features, - generator_lr=params['generator_lr'], surrogate_lr=params['surrogate_lr'], - n_generator_steps=params['n_generator_steps'], n_surrogate_steps=params['n_surrogate_steps']) - elif attack_type == 3: - surrogate_model2 = SurrogateModel(data.num_features, params['hidden_dim'], dataset.num_classes).to(device) - attack = TypeIIIAttack(generator, surrogate_model, surrogate_model2, victim_model, device, - params['noise_dim'], params['num_nodes'], data.num_features, - generator_lr=params['generator_lr'], surrogate_lr=params['surrogate_lr'], - n_generator_steps=params['n_generator_steps'], n_surrogate_steps=params['n_surrogate_steps']) - else: - raise ValueError("Invalid attack type. Choose 1, 2, or 3.") +def run_attacks(victim_model, data, device): + # Initialize generator and surrogate model + noise_dim = 32 + num_nodes = 500 + feature_dim = data.num_features + output_dim = data.y.max().item() + 1 # Calculate number of classes - trained_surrogate, _, _ = attack.attack(params['num_queries']) - accuracy, fidelity = evaluate_models(victim_model, trained_surrogate, data) - return accuracy, fidelity + generator = GraphGenerator(noise_dim, num_nodes, feature_dim, generator_type='cosine').to(device) + surrogate_model = create_victim_model_cora().to(device) -def main(): - parser = argparse.ArgumentParser(description="STEALGNN Interactive Example") - parser.add_argument("attack_type", type=int, choices=[1, 2, 3], help="Attack type (1, 2, or 3)") - parser.add_argument("dataset_name", type=str, choices=['cora', 'computers', 'pubmed', 'ogb-arxiv'], help="Dataset name") - parser.add_argument("--victim_model_path", type=str, help="Path to custom victim model file (optional)") - args = parser.parse_args() + # Attack parameters + num_queries = 300 + generator_lr = 1e-6 + surrogate_lr = 0.001 + n_generator_steps = 2 + n_surrogate_steps = 5 + # Run attacks + attacks = [ + ("Type I", TypeIAttack(generator, surrogate_model, victim_model, device, + noise_dim, num_nodes, feature_dim, generator_lr, surrogate_lr, + n_generator_steps, n_surrogate_steps))] + ''' + ("Type II", TypeIIAttack(generator, surrogate_model, victim_model, device, + noise_dim, num_nodes, feature_dim, generator_lr, surrogate_lr, + n_generator_steps, n_surrogate_steps)), + ("Type III", TypeIIIAttack(generator, surrogate_model, create_victim_model_cora().to(device), + victim_model, device, noise_dim, num_nodes, feature_dim, generator_lr, surrogate_lr, + n_generator_steps, n_surrogate_steps)) + ''' + + for attack_name, attack in attacks: + print(f"\nRunning {attack_name} Attack...") + trained_surrogate, _, _ = attack.attack(num_queries) + surrogate_accuracy = evaluate_model(trained_surrogate, data) + print(f"{attack_name} Attack - Surrogate Model Accuracy: {surrogate_accuracy:.4f}") + +def main(): + # Set up device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - - # Load dataset and create/load victim model - dataset, data, victim_model = load_dataset_and_create_victim_model(args.dataset_name, device) - - if args.victim_model_path: - try: - victim_model.load_state_dict(torch.load(args.victim_model_path, map_location=device)) - print(f"Loaded custom victim model from {args.victim_model_path}") - except FileNotFoundError: - print(f"Error: Victim model file not found at {args.victim_model_path}") - print("Training a new victim model instead...") - train_victim_model(victim_model, data, args.dataset_name) - else: - print("Training victim model...") - train_victim_model(victim_model, data, args.dataset_name) - # Set default parameters - params = { - 'noise_dim': 32, - 'num_nodes': 500, - 'hidden_dim': 16, - 'generator_type': 'cosine', - 'generator_lr': 1e-6, - 'surrogate_lr': 0.001, - 'n_generator_steps': 2, - 'n_surrogate_steps': 5, - 'num_queries': 100 - } + # Load Cora dataset + dataset = Planetoid(root='/tmp/Cora', name='Cora', transform=NormalizeFeatures()) + data = dataset[0].to(device) - # Allow user to tweak parameters - print("\nCurrent parameters:") - for key, value in params.items(): - print(f"{key}: {value}") - - change_params = input("\nDo you want to change any parameters? (y/n): ").lower() == 'y' - if change_params: - for key in params: - new_value = input(f"Enter new value for {key} (press Enter to keep current value): ") - if new_value: - params[key] = type(params[key])(new_value) + # Create and train victim model + victim_model = create_victim_model_cora().to(device) + train_victim_model(victim_model, data) - print("\nRunning attack with the following parameters:") - for key, value in params.items(): - print(f"{key}: {value}") + # Evaluate victim model + victim_accuracy = evaluate_model(victim_model, data) + print(f"Victim Model Accuracy: {victim_accuracy:.4f}") - accuracy, fidelity = run_attack(args.attack_type, args.dataset_name, victim_model, data, dataset, device, params) - - print(f"\nResults for Type {args.attack_type} Attack on {args.dataset_name} dataset:") - print(f"Accuracy: {accuracy:.4f}") - print(f"Fidelity: {fidelity:.4f}") + # Run attacks + run_attacks(victim_model, data, device) if __name__ == "__main__": main() From 42d7327c115dce9fbcc79d196077bb1185e3942a Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Sat, 12 Oct 2024 17:39:38 -0400 Subject: [PATCH 15/24] Delete pygip/data_free_attack/stealgnn.py --- pygip/data_free_attack/stealgnn.py | 294 ----------------------------- 1 file changed, 294 deletions(-) delete mode 100644 pygip/data_free_attack/stealgnn.py diff --git a/pygip/data_free_attack/stealgnn.py b/pygip/data_free_attack/stealgnn.py deleted file mode 100644 index 3cbabf7..0000000 --- a/pygip/data_free_attack/stealgnn.py +++ /dev/null @@ -1,294 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim -from torch_geometric.nn import GCNConv -from torch_geometric.utils import to_dense_adj -import numpy as np -from tqdm import tqdm - -class GraphGenerator(nn.Module): - def __init__(self, noise_dim, num_nodes, feature_dim, generator_type='cosine', threshold=0.1): - super(GraphGenerator, self).__init__() - self.noise_dim = noise_dim - self.num_nodes = num_nodes - self.feature_dim = feature_dim - self.generator_type = generator_type - self.threshold = threshold - - self.feature_gen = nn.Sequential( - nn.Linear(noise_dim, 128), - nn.ReLU(), - nn.Linear(128, 256), - nn.ReLU(), - nn.Linear(256, num_nodes * feature_dim), - nn.Tanh() - ) - - if generator_type == 'full_param': - self.structure_gen = nn.Sequential( - nn.Linear(noise_dim, 128), - nn.ReLU(), - nn.Linear(128, 256), - nn.ReLU(), - nn.Linear(256, num_nodes * num_nodes), - nn.Sigmoid() - ) - - def forward(self, z): - features = self.feature_gen(z).view(self.num_nodes, self.feature_dim) - - if self.generator_type == 'cosine': - adj = self.cosine_similarity_generator(features) - elif self.generator_type == 'full_param': - adj = self.full_param_generator(z) - else: - raise ValueError("Invalid generator type. Choose 'cosine' or 'full_param'.") - - adj = adj / adj.sum(1, keepdim=True).clamp(min=1) - - return features, adj - - def cosine_similarity_generator(self, features): - norm_features = F.normalize(features, p=2, dim=1) - adj = torch.mm(norm_features, norm_features.t()) - adj = (adj > self.threshold).float() - adj = adj * (1 - torch.eye(self.num_nodes, device=adj.device)) - return adj - - def full_param_generator(self, z): - adj = self.structure_gen(z).view(self.num_nodes, self.num_nodes) - adj = (adj + adj.t()) / 2 - adj = adj * (1 - torch.eye(self.num_nodes, device=adj.device)) - return adj - - def adj_to_edge_index(self, adj): - return adj.nonzero().t() - -class SurrogateModel(nn.Module): - def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2): - super(SurrogateModel, self).__init__() - self.convs = nn.ModuleList() - self.convs.append(GCNConv(input_dim, hidden_dim)) - - for _ in range(num_layers - 2): - self.convs.append(GCNConv(hidden_dim, hidden_dim)) - - self.convs.append(GCNConv(hidden_dim, output_dim)) - - def forward(self, x, edge_index): - for i, conv in enumerate(self.convs[:-1]): - x = conv(x, edge_index) - x = F.relu(x) - x = F.dropout(x, p=0.5, training=self.training) - - x = self.convs[-1](x, edge_index) - return F.log_softmax(x, dim=1) - -class STEALGNN: - def __init__(self, generator, surrogate_model, victim_model, device, - noise_dim, num_nodes, feature_dim, - generator_lr=1e-6, surrogate_lr=0.001, - n_generator_steps=2, n_surrogate_steps=5): - self.generator = generator - self.surrogate_model = surrogate_model - self.victim_model = victim_model - self.device = device - self.noise_dim = noise_dim - self.num_nodes = num_nodes - self.feature_dim = feature_dim - - self.generator_optimizer = optim.Adam(self.generator.parameters(), lr=generator_lr) - self.surrogate_optimizer = optim.Adam(self.surrogate_model.parameters(), lr=surrogate_lr) - - self.criterion = nn.CrossEntropyLoss() - self.n_generator_steps = n_generator_steps - self.n_surrogate_steps = n_surrogate_steps - - def generate_graph(self): - z = torch.randn(1, self.noise_dim).to(self.device) - features, adj = self.generator(z) - edge_index = self.generator.adj_to_edge_index(adj) - return features, edge_index - - def train_surrogate(self): - self.generator.eval() - self.surrogate_model.train() - - total_loss = 0 - for _ in range(self.n_surrogate_steps): - self.surrogate_optimizer.zero_grad() - - features, edge_index = self.generate_graph() - - with torch.no_grad(): - victim_output = self.victim_model(features, edge_index) - surrogate_output = self.surrogate_model(features, edge_index) - - loss = self.criterion(surrogate_output, victim_output.argmax(dim=1)) - - loss.backward() - self.surrogate_optimizer.step() - - total_loss += loss.item() - - return total_loss / self.n_surrogate_steps - - def attack(self, num_queries): - generator_losses = [] - surrogate_losses = [] - - pbar = tqdm(range(num_queries), desc=f"Running {self.__class__.__name__}") - for _ in pbar: - gen_loss = self.train_generator() - surr_loss = self.train_surrogate() - - generator_losses.append(gen_loss) - surrogate_losses.append(surr_loss) - - pbar.set_postfix({'Gen Loss': f"{gen_loss:.4f}", 'Surr Loss': f"{surr_loss:.4f}"}) - - return self.surrogate_model, generator_losses, surrogate_losses - -class TypeIAttack(STEALGNN): - def train_generator(self): - self.generator.train() - self.surrogate_model.eval() - - total_loss = 0 - for _ in range(self.n_generator_steps): - self.generator_optimizer.zero_grad() - - features, edge_index = self.generate_graph() - - with torch.no_grad(): - victim_output = self.victim_model(features, edge_index) - surrogate_output = self.surrogate_model(features, edge_index) - - loss = -self.criterion(surrogate_output, victim_output.argmax(dim=1)) - - epsilon = 1e-6 - num_directions = 2 - estimated_gradient = torch.zeros_like(features) - - for _ in range(num_directions): - u = torch.randn_like(features) - perturbed_features = features + epsilon * u - - with torch.no_grad(): - perturbed_victim_output = self.victim_model(perturbed_features, edge_index) - perturbed_surrogate_output = self.surrogate_model(perturbed_features, edge_index) - perturbed_loss = -self.criterion(perturbed_surrogate_output, perturbed_victim_output.argmax(dim=1)) - - estimated_gradient += (perturbed_loss - loss) / epsilon * u - - estimated_gradient /= num_directions - features.grad = estimated_gradient - - self.generator_optimizer.step() - total_loss += loss.item() - - return total_loss / self.n_generator_steps - -class TypeIIAttack(STEALGNN): - def train_generator(self): - self.generator.train() - self.surrogate_model.eval() - - total_loss = 0 - for _ in range(self.n_generator_steps): - self.generator_optimizer.zero_grad() - - features, edge_index = self.generate_graph() - - with torch.no_grad(): - victim_output = self.victim_model(features, edge_index) - surrogate_output = self.surrogate_model(features, edge_index) - - loss = -self.criterion(surrogate_output, victim_output.argmax(dim=1)) - loss.backward() - - self.generator_optimizer.step() - total_loss += loss.item() - - return total_loss / self.n_generator_steps - -class TypeIIIAttack(STEALGNN): - def __init__(self, generator, surrogate_model1, surrogate_model2, victim_model, device, - noise_dim, num_nodes, feature_dim, - generator_lr=1e-6, surrogate_lr=0.001, - n_generator_steps=2, n_surrogate_steps=5): - super().__init__(generator, surrogate_model1, victim_model, device, - noise_dim, num_nodes, feature_dim, - generator_lr, surrogate_lr, - n_generator_steps, n_surrogate_steps) - self.surrogate_model2 = surrogate_model2 - self.surrogate_optimizer2 = optim.Adam(self.surrogate_model2.parameters(), lr=surrogate_lr) - - def train_generator(self): - self.generator.train() - self.surrogate_model.eval() - self.surrogate_model2.eval() - - total_loss = 0 - for _ in range(self.n_generator_steps): - self.generator_optimizer.zero_grad() - - features, edge_index = self.generate_graph() - - surrogate_output1 = self.surrogate_model(features, edge_index) - surrogate_output2 = self.surrogate_model2(features, edge_index) - - loss = -torch.mean(torch.std(torch.stack([surrogate_output1, surrogate_output2]), dim=0)) - loss.backward() - - self.generator_optimizer.step() - total_loss += loss.item() - - return total_loss / self.n_generator_steps - - def train_surrogate(self): - self.generator.eval() - self.surrogate_model.train() - self.surrogate_model2.train() - - total_loss = 0 - for _ in range(self.n_surrogate_steps): - self.surrogate_optimizer.zero_grad() - self.surrogate_optimizer2.zero_grad() - - features, edge_index = self.generate_graph() - - with torch.no_grad(): - victim_output = self.victim_model(features, edge_index) - surrogate_output1 = self.surrogate_model(features, edge_index) - surrogate_output2 = self.surrogate_model2(features, edge_index) - - loss1 = self.criterion(surrogate_output1, victim_output.argmax(dim=1)) - loss2 = self.criterion(surrogate_output2, victim_output.argmax(dim=1)) - - combined_loss = loss1 + loss2 - combined_loss.backward() - - self.surrogate_optimizer.step() - self.surrogate_optimizer2.step() - - total_loss += combined_loss.item() / 2 - - return total_loss / self.n_surrogate_steps - -def evaluate_models(victim_model, surrogate_model, data): - victim_model.eval() - surrogate_model.eval() - - with torch.no_grad(): - victim_out = victim_model(data.x, data.edge_index) - surrogate_out = surrogate_model(data.x, data.edge_index) - - victim_preds = victim_out.argmax(dim=1) - surrogate_preds = surrogate_out.argmax(dim=1) - - accuracy = (surrogate_preds[data.test_mask] == data.y[data.test_mask]).float().mean().item() - fidelity = (surrogate_preds[data.test_mask] == victim_preds[data.test_mask]).float().mean().item() - - return accuracy, fidelity From 0c088e5dd956200f927f802be78d45c7317c23c5 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Fri, 25 Oct 2024 13:58:18 -0400 Subject: [PATCH 16/24] Update README.md --- pygip/data_free_attack/README.md | 206 +++++++------------------------ 1 file changed, 44 insertions(+), 162 deletions(-) diff --git a/pygip/data_free_attack/README.md b/pygip/data_free_attack/README.md index bfbed18..830d755 100644 --- a/pygip/data_free_attack/README.md +++ b/pygip/data_free_attack/README.md @@ -1,180 +1,62 @@ -# STEALGNN: Graph Neural Network Model Extraction +# Data-free Model Extraction Attacks -This repository contains an implementation of STEALGNN, a framework for data-free model extraction attacks on Graph Neural Networks (GNNs). +This directory contains an implementation of data-free model extraction attacks on Graph Neural Networks (GNNs). ## Files -1. `stealgnn.py`: Core implementation of the STEALGNN framework -2. `example.py`: Interactive script to run STEALGNN attacks +1. `example.py`: Example script demonstrating how to run data-free attacks +2. `models/`: + - `generator.py`: Graph generator implementation + - `victim.py`: Victim model implementations +3. `attacks/`: + - `attack1.py`: Type I Attack implementation + - `attack2.py`: Type II Attack implementation + - `attack3.py`: Type III Attack implementation -## stealgnn.py +## Running Data-free Attacks -This file contains the main implementation of the STEALGNN framework, including: +The `example.py` script provides a complete example of how to run data-free attacks on GNN models. Here's how to use it: -- `GraphGenerator`: Generates synthetic graphs for the attack -- `SurrogateModel`: The model used to imitate the victim GNN -- `STEALGNN`: Base class for all attack types -- `TypeIAttack`, `TypeIIAttack`, `TypeIIIAttack`: Specific implementations of each attack type -- `evaluate_models`: Function to evaluate the performance of the attacks - -## example.py - -This interactive script demonstrates how to use the STEALGNN framework. It allows users to: - -1. Choose a dataset (Cora, Computers, PubMed, or OGB-Arxiv) -2. Select an attack type (1, 2, or 3) -3. Optionally load a pre-trained victim model -4. Customize attack parameters -5. Run the attack and view results - -### Usage - -Run the script using the following command: - -``` -python example.py [--victim_model_path ] +```bash +python example.py ``` -For example: +### Example Script Structure -``` -python example.py 1 cora -python example.py 2 pubmed --victim_model_path /path/to/custom_model.pth -``` - -## Running Experiments from the Original STEALGNN Paper - -To exactly replicate the experiments from the original STEALGNN paper, use the following commands and parameters: +The example script demonstrates: +1. Loading a dataset (Cora in this example) +2. Creating and training a victim model +3. Setting up attack parameters +4. Running a Type I attack -### Ensuring Correct Victim Model Architecture - -Before running the experiments, modify the `load_dataset_and_create_victim_model` function in `example.py` to use the correct architecture for each dataset: +### Customizing Attack Parameters +The example shows the following default parameters which can be modified: ```python -def load_dataset_and_create_victim_model(dataset_name, device): - # ... (existing code for loading datasets) - - if dataset_name in ['cora', 'pubmed', 'computers']: - victim_model = GCN(input_dim, hidden_dim=64, output_dim=dataset.num_classes, num_layers=2).to(device) - elif dataset_name == 'ogb-arxiv': - victim_model = GCN(input_dim, hidden_dim=256, output_dim=dataset.num_classes, num_layers=3).to(device) - else: - raise ValueError("Invalid dataset name") - - return dataset, data, victim_model - -class GCN(torch.nn.Module): - def __init__(self, input_dim, hidden_dim, output_dim, num_layers): - super(GCN, self).__init__() - self.convs = torch.nn.ModuleList() - self.convs.append(GCNConv(input_dim, hidden_dim)) - for _ in range(num_layers - 2): - self.convs.append(GCNConv(hidden_dim, hidden_dim)) - self.convs.append(GCNConv(hidden_dim, output_dim)) - - def forward(self, x, edge_index): - for conv in self.convs[:-1]: - x = conv(x, edge_index) - x = F.relu(x) - x = F.dropout(x, p=0.5, training=self.training) - return self.convs[-1](x, edge_index) +# Attack parameters +noise_dim = 32 +num_nodes = 500 +num_queries = 300 +generator_lr = 1e-6 +surrogate_lr = 0.001 +n_generator_steps = 2 +n_surrogate_steps = 5 ``` -### Running Experiments - -1. For Cora dataset: - ``` - python example.py 1 cora - python example.py 2 cora - python example.py 3 cora - ``` - Parameters to change: - - `noise_dim`: 32 - - `num_nodes`: 2485 (Cora's original node count) - - `hidden_dim`: 64 - - `generator_type`: 'cosine' - - `generator_lr`: 1e-6 - - `surrogate_lr`: 0.001 - - `n_generator_steps`: 2 - - `n_surrogate_steps`: 5 - - `num_queries`: 700 - -2. For PubMed dataset: - ``` - python example.py 1 pubmed - python example.py 2 pubmed - python example.py 3 pubmed - ``` - Parameters to change: - - `noise_dim`: 32 - - `num_nodes`: 19717 (PubMed's original node count) - - `hidden_dim`: 64 - - `generator_type`: 'cosine' - - `generator_lr`: 1e-6 - - `surrogate_lr`: 0.001 - - `n_generator_steps`: 2 - - `n_surrogate_steps`: 5 - - `num_queries`: 700 - -3. For Amazon Computers dataset: - ``` - python example.py 1 computers - python example.py 2 computers - python example.py 3 computers - ``` - Parameters to change: - - `noise_dim`: 32 - - `num_nodes`: 13381 (Amazon Computers' original node count) - - `hidden_dim`: 64 - - `generator_type`: 'cosine' - - `generator_lr`: 1e-6 - - `surrogate_lr`: 0.001 - - `n_generator_steps`: 2 - - `n_surrogate_steps`: 5 - - `num_queries`: 700 - -4. For OGB-Arxiv dataset: - ``` - python example.py 1 ogb-arxiv - python example.py 2 ogb-arxiv - python example.py 3 ogb-arxiv - ``` - Parameters to change: - - `noise_dim`: 32 - - `num_nodes`: 169343 (OGB-Arxiv's original node count) - - `hidden_dim`: 128 - - `generator_type`: 'cosine' - - `generator_lr`: 1e-6 - - `surrogate_lr`: 0.001 - - `n_generator_steps`: 2 - - `n_surrogate_steps`: 5 - - `num_queries`: 700 - -For each dataset, run all three attack types (1, 2, and 3) to compare their performance. +### Running Different Attack Types -When prompted to change parameters in the interactive script, make sure to input the values listed above for each dataset to exactly replicate the paper's experiments. +The example includes code for all three attack types. To run different attacks, simply uncomment the desired attack in the `attacks` list: -## Additional Notes - -- The experiments use PyTorch Geometric for graph operations and model implementations. -- Adam optimizer is used for both the generator and surrogate model training. -- Run each experiment multiple times (5 times in the original paper) to account for randomness, and average the results for a more robust comparison. -- Ensure that your Python environment has all necessary dependencies installed, including PyTorch, PyTorch Geometric, and OGB (for the OGB-Arxiv dataset). - -## Customizing Experiments - -When running `example.py`, you'll be prompted to modify the default parameters. You can experiment with different values for: - -- `noise_dim`: Dimension of the noise vector for graph generation -- `num_nodes`: Number of nodes in the generated graphs -- `hidden_dim`: Hidden dimension size for the surrogate model -- `generator_type`: Type of graph generator ('cosine' or 'full_param') -- `generator_lr`: Learning rate for the generator -- `surrogate_lr`: Learning rate for the surrogate model -- `n_generator_steps`: Number of generator training steps per iteration -- `n_surrogate_steps`: Number of surrogate model training steps per iteration -- `num_queries`: Total number of queries to the victim model - -Adjust these parameters to explore their impact on the attack performance. +```python +attacks = [ + ("Type I", TypeIAttack(generator, surrogate_model, victim_model, device, + noise_dim, num_nodes, feature_dim, generator_lr, surrogate_lr, + n_generator_steps, n_surrogate_steps)), + # Uncomment to run Type II attack + # ("Type II", TypeIIAttack(...)), + # Uncomment to run Type III attack + # ("Type III", TypeIIIAttack(...)) +] +``` -By following this README, you should be able to replicate the experiments from the original STEALGNN paper and conduct your own experiments using this implementation. +The example script provides a template that can be easily modified to run attacks with different parameters or on different models. See the commented code in the script for Type II and Type III attack implementations. From 861da43cf5bd5cecf1c433920cc19c7f912bf20e Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Fri, 25 Oct 2024 14:03:45 -0400 Subject: [PATCH 17/24] Update README.md --- pygip/data_free_attack/README.md | 70 +++++++++++++------------------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/pygip/data_free_attack/README.md b/pygip/data_free_attack/README.md index 830d755..68a84e8 100644 --- a/pygip/data_free_attack/README.md +++ b/pygip/data_free_attack/README.md @@ -4,7 +4,7 @@ This directory contains an implementation of data-free model extraction attacks ## Files -1. `example.py`: Example script demonstrating how to run data-free attacks +1. `example.py`: Interactive script demonstrating how to run data-free attacks 2. `models/`: - `generator.py`: Graph generator implementation - `victim.py`: Victim model implementations @@ -15,48 +15,36 @@ This directory contains an implementation of data-free model extraction attacks ## Running Data-free Attacks -The `example.py` script provides a complete example of how to run data-free attacks on GNN models. Here's how to use it: +The `example.py` script provides an interactive way to run data-free attacks on GNN models. Here's how to use it: ```bash python example.py ``` -### Example Script Structure - -The example script demonstrates: -1. Loading a dataset (Cora in this example) -2. Creating and training a victim model -3. Setting up attack parameters -4. Running a Type I attack - -### Customizing Attack Parameters - -The example shows the following default parameters which can be modified: -```python -# Attack parameters -noise_dim = 32 -num_nodes = 500 -num_queries = 300 -generator_lr = 1e-6 -surrogate_lr = 0.001 -n_generator_steps = 2 -n_surrogate_steps = 5 -``` - -### Running Different Attack Types - -The example includes code for all three attack types. To run different attacks, simply uncomment the desired attack in the `attacks` list: - -```python -attacks = [ - ("Type I", TypeIAttack(generator, surrogate_model, victim_model, device, - noise_dim, num_nodes, feature_dim, generator_lr, surrogate_lr, - n_generator_steps, n_surrogate_steps)), - # Uncomment to run Type II attack - # ("Type II", TypeIIAttack(...)), - # Uncomment to run Type III attack - # ("Type III", TypeIIIAttack(...)) -] -``` - -The example script provides a template that can be easily modified to run attacks with different parameters or on different models. See the commented code in the script for Type II and Type III attack implementations. +When you run the script, it will: +1. Load the Cora dataset +2. Create and train a victim model +3. Prompt you to choose an attack type: + ``` + Choose attack type (1, 2, or 3): + ``` +4. Run the selected attack with the following default parameters: + ```python + noise_dim = 32 + num_nodes = 500 + num_queries = 300 + generator_lr = 1e-6 + surrogate_lr = 0.001 + n_generator_steps = 2 + n_surrogate_steps = 5 + ``` + +### Attack Types + +1. Type I Attack: Basic model extraction attack +2. Type II Attack: Enhanced extraction with improved query strategy +3. Type III Attack: Advanced extraction with additional model architecture considerations + +Choose the attack type by entering the corresponding number (1, 2, or 3) when prompted. + +The script will display the progress of the victim model training and the final accuracy of both the victim and surrogate models. From 0e84e61738e737e223c0e216296791759d08e22e Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Fri, 25 Oct 2024 14:06:17 -0400 Subject: [PATCH 18/24] Update README.md --- pygip/data_free_attack/README.md | 38 +++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/pygip/data_free_attack/README.md b/pygip/data_free_attack/README.md index 68a84e8..2d14ab8 100644 --- a/pygip/data_free_attack/README.md +++ b/pygip/data_free_attack/README.md @@ -47,4 +47,40 @@ When you run the script, it will: Choose the attack type by entering the corresponding number (1, 2, or 3) when prompted. -The script will display the progress of the victim model training and the final accuracy of both the victim and surrogate models. +### Sample Output + +``` +Epoch 10/200, Train Loss: 1.7342, Val Loss: 1.8183, Val Acc: 0.7460 +Epoch 20/200, Train Loss: 1.3186, Val Loss: 1.5902, Val Acc: 0.7860 +Epoch 30/200, Train Loss: 0.8908, Val Loss: 1.3175, Val Acc: 0.7880 +Epoch 40/200, Train Loss: 0.5930, Val Loss: 1.0948, Val Acc: 0.7860 +Epoch 50/200, Train Loss: 0.4184, Val Loss: 0.9633, Val Acc: 0.7940 +Epoch 60/200, Train Loss: 0.3414, Val Loss: 0.8969, Val Acc: 0.7900 +Epoch 70/200, Train Loss: 0.2943, Val Loss: 0.8568, Val Acc: 0.7900 +Epoch 80/200, Train Loss: 0.2577, Val Loss: 0.8343, Val Acc: 0.7940 +Epoch 90/200, Train Loss: 0.2487, Val Loss: 0.8058, Val Acc: 0.7960 +Epoch 100/200, Train Loss: 0.2310, Val Loss: 0.7731, Val Acc: 0.7880 +Epoch 110/200, Train Loss: 0.2129, Val Loss: 0.7825, Val Acc: 0.7900 +Epoch 120/200, Train Loss: 0.2092, Val Loss: 0.7696, Val Acc: 0.7920 +Epoch 130/200, Train Loss: 0.1865, Val Loss: 0.7548, Val Acc: 0.7940 +Epoch 140/200, Train Loss: 0.1748, Val Loss: 0.7522, Val Acc: 0.7960 +Epoch 150/200, Train Loss: 0.1769, Val Loss: 0.7385, Val Acc: 0.7940 +Epoch 160/200, Train Loss: 0.1682, Val Loss: 0.7552, Val Acc: 0.7920 +Epoch 170/200, Train Loss: 0.1557, Val Loss: 0.7254, Val Acc: 0.7880 +Epoch 180/200, Train Loss: 0.1608, Val Loss: 0.7346, Val Acc: 0.7940 +Epoch 190/200, Train Loss: 0.1517, Val Loss: 0.7433, Val Acc: 0.7860 +Epoch 200/200, Train Loss: 0.1482, Val Loss: 0.7290, Val Acc: 0.7940 +Victim Model Accuracy: 0.8070 + +Choose attack type (1, 2, or 3): 2 + +Running Type II Attack... +Attacking: 100%|██████████████████████████████| 300/300 [01:09<00:00, 4.29it/s, Gen Loss=-0.3422, Surr Loss=0.4532] +Type II Attack - Surrogate Model Accuracy: 0.8090 +``` + +The script will display: +1. Training progress of the victim model, showing loss and validation accuracy +2. Final victim model accuracy +3. Progress bar during the attack +4. Final surrogate model accuracy From 09583f9ef10c707d45833a7e8adf620413b81c16 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Fri, 25 Oct 2024 14:19:25 -0400 Subject: [PATCH 19/24] Update example.py --- pygip/data_free_attack/example.py | 85 ++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 30 deletions(-) diff --git a/pygip/data_free_attack/example.py b/pygip/data_free_attack/example.py index 9b2cc77..b17fc99 100644 --- a/pygip/data_free_attack/example.py +++ b/pygip/data_free_attack/example.py @@ -3,7 +3,6 @@ import torch.optim as optim from torch_geometric.datasets import Planetoid from torch_geometric.transforms import NormalizeFeatures - from models.generator import GraphGenerator from models.victim import create_victim_model_cora from attacks.attack1 import TypeIAttack @@ -29,22 +28,36 @@ def train_victim_model(model, data, epochs=200, lr=0.01, weight_decay=5e-4): model.train() print(f'Epoch {epoch+1}/{epochs}, Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Val Acc: {val_acc.item():.4f}') -def evaluate_model(model, data): - model.eval() - with torch.no_grad(): - out = model(data.x, data.edge_index) - pred = out.argmax(dim=1) - correct = pred[data.test_mask] == data.y[data.test_mask] - accuracy = int(correct.sum()) / int(data.test_mask.sum()) +def evaluate_model(model_output, data): + if isinstance(model_output, tuple): + # Handle case where we have two surrogate models + model1, model2 = model_output + model1.eval() + model2.eval() + with torch.no_grad(): + # Get predictions from both models + out1 = model1(data.x, data.edge_index) + out2 = model2(data.x, data.edge_index) + # Average the predictions + out = (out1 + out2) / 2 + pred = out.argmax(dim=1) + else: + # Handle single model case + model_output.eval() + with torch.no_grad(): + out = model_output(data.x, data.edge_index) + pred = out.argmax(dim=1) + + correct = pred[data.test_mask] == data.y[data.test_mask] + accuracy = int(correct.sum()) / int(data.test_mask.sum()) return accuracy -def run_attacks(victim_model, data, device): +def run_attack(victim_model, data, device, attack_type): # Initialize generator and surrogate model noise_dim = 32 num_nodes = 500 feature_dim = data.num_features output_dim = data.y.max().item() + 1 # Calculate number of classes - generator = GraphGenerator(noise_dim, num_nodes, feature_dim, generator_type='cosine').to(device) surrogate_model = create_victim_model_cora().to(device) @@ -55,25 +68,26 @@ def run_attacks(victim_model, data, device): n_generator_steps = 2 n_surrogate_steps = 5 - # Run attacks - attacks = [ - ("Type I", TypeIAttack(generator, surrogate_model, victim_model, device, - noise_dim, num_nodes, feature_dim, generator_lr, surrogate_lr, - n_generator_steps, n_surrogate_steps))] - ''' - ("Type II", TypeIIAttack(generator, surrogate_model, victim_model, device, - noise_dim, num_nodes, feature_dim, generator_lr, surrogate_lr, - n_generator_steps, n_surrogate_steps)), - ("Type III", TypeIIIAttack(generator, surrogate_model, create_victim_model_cora().to(device), - victim_model, device, noise_dim, num_nodes, feature_dim, generator_lr, surrogate_lr, - n_generator_steps, n_surrogate_steps)) - ''' + if attack_type == 1: + attack = TypeIAttack(generator, surrogate_model, victim_model, device, + noise_dim, num_nodes, feature_dim, generator_lr, surrogate_lr, + n_generator_steps, n_surrogate_steps) + elif attack_type == 2: + attack = TypeIIAttack(generator, surrogate_model, victim_model, device, + noise_dim, num_nodes, feature_dim, generator_lr, surrogate_lr, + n_generator_steps, n_surrogate_steps) + elif attack_type == 3: + surrogate_model2 = create_victim_model_cora().to(device) + attack = TypeIIIAttack(generator, surrogate_model, surrogate_model2, victim_model, device, + noise_dim, num_nodes, feature_dim, generator_lr, surrogate_lr, + n_generator_steps, n_surrogate_steps) + else: + raise ValueError("Invalid attack type. Please choose 1, 2, or 3.") - for attack_name, attack in attacks: - print(f"\nRunning {attack_name} Attack...") - trained_surrogate, _, _ = attack.attack(num_queries) - surrogate_accuracy = evaluate_model(trained_surrogate, data) - print(f"{attack_name} Attack - Surrogate Model Accuracy: {surrogate_accuracy:.4f}") + print(f"\nRunning Type {attack_type} Attack...") + trained_surrogate, _, _ = attack.attack(num_queries) + surrogate_accuracy = evaluate_model(trained_surrogate, data) + print(f"Type {attack_type} Attack - Surrogate Model Accuracy: {surrogate_accuracy:.4f}") def main(): # Set up device @@ -91,8 +105,19 @@ def main(): victim_accuracy = evaluate_model(victim_model, data) print(f"Victim Model Accuracy: {victim_accuracy:.4f}") - # Run attacks - run_attacks(victim_model, data, device) + # Get attack type from user + while True: + try: + attack_type = int(input("\nChoose attack type (1, 2, or 3): ")) + if attack_type in [1, 2, 3]: + break + else: + print("Please enter 1, 2, or 3.") + except ValueError: + print("Please enter a valid number (1, 2, or 3).") + + # Run selected attack + run_attack(victim_model, data, device, attack_type) if __name__ == "__main__": main() From 59863de92b320aaca84915ce3ee6b9fec3dc3487 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Fri, 25 Oct 2024 14:48:36 -0400 Subject: [PATCH 20/24] Update requirements.txt --- requirements.txt | 68 ++++++++++++++++-------------------------------- 1 file changed, 23 insertions(+), 45 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8d8936d..93cb5aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,67 +1,45 @@ -# System packages (commented out as they're typically managed by the system) # bzip2==1.0.8 # ca-certificates==2024.7.2 # libffi==3.4.4 # ncurses==6.4 # openssl==3.0.14 +# pip==24.0 +# python==3.10.14 # readline==8.2 +# setuptools==69.5.1 # sqlite==3.45.3 # tk==8.6.14 +# wheel==0.43.0 # xz==5.4.6 # zlib==1.2.13 - -# Python and pip -python==3.10.14 -pip==24.0 -setuptools==69.5.1 -wheel==0.43.0 - -# Core libraries torch==2.3.0 -numpy==2.0.1 -pandas==2.2.2 -scipy==1.14.0 - -# Graph-related libraries -torch-geometric==2.5.0 -networkx==3.3 -ogb==1.3.6 -dgl==2.2.1 - -# Utility libraries -tqdm==4.66.4 -pyyaml==6.0.1 -requests==2.32.3 -fsspec==2024.6.1 -psutil==6.0.0 - -# Data handling and processing -torchdata==0.7.1 -python-dateutil==2.9.0.post0 -pytz==2024.1 -tzdata==2024.1 - -# Type checking and extensions -typing-extensions==4.12.2 annotated-types==0.7.0 -pydantic==2.8.2 -pydantic-core==2.20.1 - -# Other dependencies certifi==2024.7.4 charset-normalizer==3.3.2 filelock==3.15.4 +fsspec==2024.6.1 idna==3.7 jinja2==3.1.4 markupsafe==2.1.5 mpmath==1.3.0 +networkx==3.3 +numpy==2.0.1 +pandas==2.2.2 +psutil==6.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +python-dateutil==2.9.0.post0 +pytz==2024.1 +pyyaml==6.0.1 +requests==2.32.3 +scipy==1.14.0 six==1.16.0 sympy==1.13.1 +torchdata==0.7.1 +tqdm==4.66.4 +typing-extensions==4.12.2 +tzdata==2024.1 urllib3==2.2.2 - -# Additional useful libraries for ML projects -scikit-learn==1.3.0 -matplotlib==3.7.5 - -# Packaging -packaging==23.2 +dgl==2.2.1 +torch_geometric +packaging From 78215a2be798641965e10cb98370bc0219e4f7a2 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Fri, 25 Oct 2024 15:08:17 -0400 Subject: [PATCH 21/24] Update requirements.txt --- requirements.txt | 68 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/requirements.txt b/requirements.txt index 93cb5aa..8d8936d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,45 +1,67 @@ +# System packages (commented out as they're typically managed by the system) # bzip2==1.0.8 # ca-certificates==2024.7.2 # libffi==3.4.4 # ncurses==6.4 # openssl==3.0.14 -# pip==24.0 -# python==3.10.14 # readline==8.2 -# setuptools==69.5.1 # sqlite==3.45.3 # tk==8.6.14 -# wheel==0.43.0 # xz==5.4.6 # zlib==1.2.13 + +# Python and pip +python==3.10.14 +pip==24.0 +setuptools==69.5.1 +wheel==0.43.0 + +# Core libraries torch==2.3.0 +numpy==2.0.1 +pandas==2.2.2 +scipy==1.14.0 + +# Graph-related libraries +torch-geometric==2.5.0 +networkx==3.3 +ogb==1.3.6 +dgl==2.2.1 + +# Utility libraries +tqdm==4.66.4 +pyyaml==6.0.1 +requests==2.32.3 +fsspec==2024.6.1 +psutil==6.0.0 + +# Data handling and processing +torchdata==0.7.1 +python-dateutil==2.9.0.post0 +pytz==2024.1 +tzdata==2024.1 + +# Type checking and extensions +typing-extensions==4.12.2 annotated-types==0.7.0 +pydantic==2.8.2 +pydantic-core==2.20.1 + +# Other dependencies certifi==2024.7.4 charset-normalizer==3.3.2 filelock==3.15.4 -fsspec==2024.6.1 idna==3.7 jinja2==3.1.4 markupsafe==2.1.5 mpmath==1.3.0 -networkx==3.3 -numpy==2.0.1 -pandas==2.2.2 -psutil==6.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -python-dateutil==2.9.0.post0 -pytz==2024.1 -pyyaml==6.0.1 -requests==2.32.3 -scipy==1.14.0 six==1.16.0 sympy==1.13.1 -torchdata==0.7.1 -tqdm==4.66.4 -typing-extensions==4.12.2 -tzdata==2024.1 urllib3==2.2.2 -dgl==2.2.1 -torch_geometric -packaging + +# Additional useful libraries for ML projects +scikit-learn==1.3.0 +matplotlib==3.7.5 + +# Packaging +packaging==23.2 From 96a4d95d21c74121d8e75becb147474e14092cb7 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Fri, 25 Oct 2024 15:26:06 -0400 Subject: [PATCH 22/24] Update requirements.txt --- requirements.txt | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8d8936d..45b5466 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,45 +9,27 @@ # tk==8.6.14 # xz==5.4.6 # zlib==1.2.13 - -# Python and pip -python==3.10.14 -pip==24.0 -setuptools==69.5.1 -wheel==0.43.0 - -# Core libraries +numpy>=1.23.5,<2.0.0 torch==2.3.0 -numpy==2.0.1 pandas==2.2.2 scipy==1.14.0 - -# Graph-related libraries torch-geometric==2.5.0 networkx==3.3 ogb==1.3.6 dgl==2.2.1 - -# Utility libraries tqdm==4.66.4 pyyaml==6.0.1 requests==2.32.3 fsspec==2024.6.1 psutil==6.0.0 - -# Data handling and processing torchdata==0.7.1 python-dateutil==2.9.0.post0 pytz==2024.1 tzdata==2024.1 - -# Type checking and extensions typing-extensions==4.12.2 annotated-types==0.7.0 pydantic==2.8.2 pydantic-core==2.20.1 - -# Other dependencies certifi==2024.7.4 charset-normalizer==3.3.2 filelock==3.15.4 @@ -58,10 +40,6 @@ mpmath==1.3.0 six==1.16.0 sympy==1.13.1 urllib3==2.2.2 - -# Additional useful libraries for ML projects scikit-learn==1.3.0 matplotlib==3.7.5 - -# Packaging packaging==23.2 From a13c4ab87329636adb64b37846f053a60ecef4d6 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Fri, 25 Oct 2024 15:36:04 -0400 Subject: [PATCH 23/24] Update environment.yml --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index f81ef88..e90e3cd 100644 --- a/environment.yml +++ b/environment.yml @@ -27,7 +27,7 @@ dependencies: - markupsafe==2.1.5 - mpmath==1.3.0 - networkx==3.3 - - numpy==2.0.1 + - numpy>=1.23.5,<2.0.0 - pandas==2.2.2 - psutil==6.0.0 - pydantic==2.8.2 From 7fc0012ab3a2d2064a146a1b8179530274c42553 Mon Sep 17 00:00:00 2001 From: Tyler Blalock <122387381+T-Breezy444@users.noreply.github.com> Date: Fri, 25 Oct 2024 16:52:17 -0400 Subject: [PATCH 24/24] Update README.md Put together a guide from what I have learned from trying this library on 3 different machines --- README.md | 96 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 2e8261f..6fda9a7 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,105 @@ +# PyGIP Installation Guide + +PyGIP supports multiple CUDA versions and provides two installation methods. Choose the method that best suits your needs. + +## Method 1: Direct Installation + +Create and activate a new conda environment: ```bash -# pip install conda create -n pygip python=3.10.14 conda activate pygip -# if you use cuda 11.x +``` + +### Choose your CUDA version: + +#### For CUDA 11.x users: +```bash pip install pygip -f https://data.dgl.ai/wheels/torch-2.3/cu118/repo.html --extra-index-url https://download.pytorch.org/whl/cu118 -# if you use cuda 12.x -# pip install pygip -f https://data.dgl.ai/wheels/torch-2.3/cu121/repo.html --extra-index-url https://data.dgl.ai/wheels/torch-2.3/cu121/repo.html ``` +#### For CUDA 12.x users: +```bash +pip install pygip -f https://data.dgl.ai/wheels/torch-2.3/cu121/repo.html --extra-index-url https://data.dgl.ai/wheels/torch-2.3/cu121/repo.html +``` + +## Method 2: Environment Setup + +This method uses a predefined environment.yml file and is recommended for development: +1. Create and activate the environment: ```bash -# Simple setup. conda env create -f environment.yml -n pygip conda activate pygip -pip install dgl -f https://data.dgl.ai/wheels/repo.html #due to dgl issues, unfortunately we have to install this dgl 2.2.1 manually. +``` -# Under the GNNIP directory +2. Install DGL manually (required due to DGL 2.2.1 dependency issues): +```bash +pip install dgl -f https://data.dgl.ai/wheels/repo.html +``` + +3. Set up the Python path (run this from the PyGIP root directory): +```bash +# Linux/Mac: export PYTHONPATH=`pwd` -# Quick testing -python3 examples/examples.py +# Windows: +set PYTHONPATH=%cd% ``` +4. Test the installation: +```bash +python examples/examples.py +``` + +## Verifying CUDA Setup + +To verify your CUDA installation is working correctly: +```python +import torch +print("CUDA Available:", torch.cuda.is_available()) +print("CUDA Version:", torch.version.cuda) +print("GPU Device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found") +``` + +## Troubleshooting + +If you encounter CUDA-related issues: + +1. Ensure your NVIDIA drivers are up to date: +```bash +nvidia-smi +``` + +2. If you need to reinstall PyTorch with a specific CUDA version: +```bash +# Remove existing torch installation +pip uninstall torch torch-geometric -y + +# For CUDA 11.x: +pip install torch --index-url https://download.pytorch.org/whl/cu118 +pip install torch-geometric==2.5.0 + +# For CUDA 12.x: +pip install torch --index-url https://download.pytorch.org/whl/cu121 +pip install torch-geometric==2.5.0 +``` + +3. Verify DGL installation: +```bash +python -c "import dgl; print(dgl.__version__)" +``` + +## Requirements + +PyGIP has been tested with the following core dependencies: +- Python 3.10.14 +- PyTorch 2.3.0 +- torch-geometric 2.5.0 +- DGL 2.2.1 + +For a complete list of dependencies, see the `requirements.txt` file in the repository.y + + # Attack ## Model Extraction Attacks against Graph Neural Network