Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions examples/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,6 @@ def test():
elif (dataset_name == 3):
defense = Watermark_sage(PubMed(), 0.25)
defense.watermark_attack(PubMed(), attack_name, dataset_name)


test()
2 changes: 2 additions & 0 deletions new_code/attack/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .attack import *
from .target import *
116 changes: 116 additions & 0 deletions new_code/attack/attack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import os
import numpy as np
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.metrics import roc_auc_score
th.manual_seed(0)

from model.mlp import MLP_ATTACK, MLP_ATTACK_PLUS, MLP_ATTACK_PLUS2, MLP_ATTACK_ALL

def _weights_init_normal(m):
classname = m.__class__.__name__
if classname.find('Linear') != -1:
y = m.in_features
m.weight.data.normal_(0.0, 1/np.sqrt(y))
m.bias.data.fill_(0)

def save_attack_model(args, model):
if not os.path.exists(args.attack_model_save_path):
os.makedirs(args.attack_model_save_path)
save_name = os.path.join(args.attack_model_save_path, f'{args.attack_model_prefix}_{args.dataset}_{args.target_model}_{args.shadow_model}_{args.node_topology}_{args.feature}_{args.edge_feature}.pth')
th.save(model.state_dict(), save_name)
print(f"Finish training, save model to {save_name}")

def load_attack_model(model, model_path, device):
print("load model from:", model_path)
state_dict = th.load(model_path, map_location=device)
model.load_state_dict(state_dict)
return model

def test_features(args, epoch, model, test_dataloader, num_features, stat_dict=None):
device = args.device
test_acc, correct, total, scores, targets = 0.0, 0, 0, [], []
stat_dict = stat_dict or {}
model.eval()

with th.no_grad():
for data in test_dataloader:
inputs = [x.to(device) for x in data[:-1]]
label = data[-1].to(device)

outputs = model(*inputs)
posteriors = F.softmax(outputs, dim=1)
_, predicted = posteriors.max(1)
total += label.size(0)
correct += predicted.eq(label).sum().item()

if epoch == args.num_epochs - 1 and not args.diff:
for i, posterior in zip(data[0], posteriors):
stat_dict[tuple(i.cpu().numpy())][f'{args.method}_attack_posterior'] = posterior.cpu().numpy()

targets.extend(label.cpu().numpy().tolist())
scores.extend([i.cpu().numpy()[1] for i in posteriors])

test_acc = correct / total
test_auc = roc_auc_score(targets, scores)
print(f'Test Acc: {100. * test_acc:.3f}% ({correct}/{total}) AUC Score: {test_auc:.3f}')

return test_acc, test_auc, stat_dict

def run_attack(args, in_dim, train_dataloader, test_dataloader, stat_dict, model_class, model_args=()):
epoch, device = args.num_epochs, args.device
model = model_class(*model_args).to(device)
model.apply(_weights_init_normal)
loss_fcn = nn.CrossEntropyLoss().to(device)
optimizer = th.optim.Adam(model.parameters(), lr=args.lr) if args.optim == 'adam' else th.optim.SGD(model.parameters(), lr=args.lr)
scheduler = CosineAnnealingLR(optimizer, T_max=epoch, eta_min=0)
train_acc = 0.0

for e in range(epoch):
correct, total, targets, scores = 0, 0, [], []
model.train()

for _, *features, label in train_dataloader:
optimizer.zero_grad()
features = [f.to(device) for f in features]
label = label.to(device)

outputs = model(*features)
posteriors = F.softmax(outputs, dim=1)
loss = loss_fcn(posteriors, label)
loss.backward()
optimizer.step()

_, predicted = posteriors.max(1)
total += label.size(0)
correct += predicted.eq(label).sum().item()
targets.extend(label.cpu().detach().numpy().tolist())
scores.extend([i.cpu().detach().numpy()[1] for i in posteriors])

if args.scheduler:
scheduler.step()

train_acc = correct / total
train_auc = roc_auc_score(targets, scores)
print(f'[Epoch {e}] Train Acc: {100. * train_acc:.3f}% ({correct}/{total}) AUC Score: {train_auc:.3f}')

if e == epoch - 1:
test_acc, test_auc, stat_dict = test_features(args, e, model, test_dataloader, len(model_args), stat_dict)
save_attack_model(args, model)
else:
test_acc, test_auc, _ = test_features(args, e, model, test_dataloader, len(model_args))

return model, train_acc, train_auc, test_acc, test_auc, stat_dict

# Example for running attack with different feature combinations
def run_attack_one_feature(args, in_dim, train_dataloader, test_dataloader, stat_dict):
return run_attack(args, in_dim, train_dataloader, test_dataloader, stat_dict, MLP_ATTACK)

def run_attack_two_features(args, posterior_feature_dim, train_dataloader, test_dataloader, stat_dict):
model_class = MLP_ATTACK_PLUS2 if args.feature == 'posteriors_graph' or args.feature == 'label_graph' else MLP_ATTACK_PLUS
return run_attack(args, posterior_feature_dim, train_dataloader, test_dataloader, stat_dict, model_class, (args.graph_feature_dim, posterior_feature_dim) if model_class == MLP_ATTACK_PLUS2 else (args.node_feature_dim, posterior_feature_dim))

def run_attack_three_features(args, posterior_feature_dim, train_dataloader, test_dataloader, stat_dict):
return run_attack(args, posterior_feature_dim, train_dataloader, test_dataloader, stat_dict, MLP_ATTACK_ALL, (args.node_feature_dim, posterior_feature_dim, args.graph_feature_dim))
123 changes: 123 additions & 0 deletions new_code/attack/target.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import os
import dgl
import time
import numpy as np
import torch as th
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
th.manual_seed(1)

from utils.load_model import get_gnn_model
from utils.model_evaluation import compute_accuracy, evaluate_model

def get_dataloader(train_graph, args):
# Set up the data loader for training
train_node_ids = th.tensor(range(0, len(train_graph.nodes())))
sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) # Sampling neighbors
return dgl.dataloading.DataLoader(
train_graph,
train_node_ids,
sampler,
batch_size=args.batch_size,
shuffle=True,
drop_last=False,
num_workers=args.num_workers
)

def initialize_model_and_optimizer(args):
# Initialize model, loss function, and optimizer
model = get_gnn_model(args).to(args.device)
loss_function = nn.CrossEntropyLoss().to(args.device)
optimizer = optim.Adam(model.parameters(), lr=args.lr)
return model, loss_function, optimizer

def training_step(model, blocks, batch_inputs, batch_labels, loss_function, optimizer, args, tic_step):
# One training step: forward, loss calculation, and backpropagation
blocks = [block.int().to(args.device) for block in blocks]
batch_pred = model(blocks, batch_inputs) # Model's prediction
batch_pred = F.softmax(batch_pred, dim=1) # Apply softmax for classification
loss = loss_function(batch_pred, batch_labels) # Compute loss
optimizer.zero_grad() # Zero out gradients
loss.backward() # Backpropagate
optimizer.step() # Update weights
return loss, batch_pred

def log_info(epoch, step, loss, batch_pred, batch_labels, iter_tput):
# Print training details like loss, accuracy, and throughput
acc = compute_accuracy(batch_pred, batch_labels)
print(f'Epoch {epoch:05d} | Step {step:05d} | Loss {loss.item():.4f} | '
f'Train Acc {acc.item():.4f} | Speed (samples/sec) {np.mean(iter_tput[3:]):.4f}')

def evaluate_and_log(model, train_graph, test_graph, train_node_ids, test_node_ids, args):
# Evaluate the model on train and test datasets
train_acc, _ = evaluate_model(model, train_graph, train_graph.ndata['features'],
train_graph.ndata['labels'], train_node_ids, args.device)
print(f'Train Acc {train_acc:.4f}')

test_acc, _ = evaluate_model(model, test_graph, test_graph.ndata['features'],
test_graph.ndata['labels'], test_node_ids, args.device)
print(f'Test Acc: {test_acc:.4f}')

def save_trained_model(model, args):
# Save the trained model to disk
model_save_path = os.path.join(
args.model_save_path,
f'{args.setting}_{args.dataset}_{args.model}_{args.mode}'
f'_{args.prop if args.prop else ""}.pth'
)
print(f"Training complete, model saved to {model_save_path}")
th.save(model.state_dict(), model_save_path)

def run_gnn(args, data):
train_graph, test_graph = data
train_node_ids = th.tensor(range(0, len(train_graph.nodes())))
test_node_ids = th.tensor(range(0, len(test_graph.nodes())))

# Initialize DataLoader for training
dataloader = get_dataloader(train_graph, args)

# Initialize model, loss function, and optimizer
model, loss_function, optimizer = initialize_model_and_optimizer(args)

iter_tput = [] # List for tracking throughput during training
avg_epoch_time = 0 # Average time taken per epoch

# Training loop
for epoch in range(args.num_epochs):
epoch_start_time = time.time()

# Loop through batches
for step, (_, seeds, blocks) in enumerate(dataloader):
batch_inputs = blocks[0].srcdata['features']
batch_labels = blocks[-1].dstdata['labels'].to(device=args.device, dtype=th.long)

# Perform a training step (forward, backward, and optimization)
loss, batch_pred = training_step(model, blocks, batch_inputs, batch_labels, loss_function, optimizer, args, epoch_start_time)

# Track throughput and log training info every few steps
iter_tput.append(len(seeds) / (time.time() - epoch_start_time))
if step % args.log_every == 0:
log_info(epoch, step, loss, batch_pred, batch_labels, iter_tput)

epoch_start_time = time.time()

# Log time taken for each epoch
epoch_end_time = time.time()
print(f'Epoch {epoch}, Time(s): {epoch_end_time - epoch_start_time:.4f}')

# Calculate average time after the first few epochs
if epoch >= 5:
avg_epoch_time += epoch_end_time - epoch_start_time

# Evaluate the model periodically
if epoch % args.eval_every == 0 and epoch != 0:
evaluate_and_log(model, train_graph, test_graph, train_node_ids, test_node_ids, args)

# Save the trained model
save_trained_model(model, args)

# Final evaluation
evaluate_and_log(model, train_graph, test_graph, train_node_ids, test_node_ids, args)

return evaluate_and_log(model, train_graph, test_graph, train_node_ids, test_node_ids, args)
4 changes: 4 additions & 0 deletions new_code/load_data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .load_graph import *
from .inductive_split import *
from .generate_xy import *
from .batch_data import *
113 changes: 113 additions & 0 deletions new_code/load_data/batch_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import dgl
import networkx as nx
from utils.model_query import query_trained_model
from load_data.generate_xy import generate_features


def get_batch(args, batch_pairs, g, k, mode):
"""
Generates a batch of subgraphs and queries posteriors for node pairs.

Args:
args: Arguments for the model and batch generation.
batch_pairs: List of node pairs for which the batch is generated.
g: The graph object (DGL graph).
k: The number of hops to consider for subgraphs.
mode: Mode for querying the trained model.

Returns:
posteriors_dict_batch: A dictionary of posteriors.
index_mapping_dict_batch: A dictionary of index mappings for nodes.
"""
query_graph_batch, index_mapping_dict_batch = get_khop_query_graph_batch(g, batch_pairs, k)
index_update_batch = [node for _, nodes in index_mapping_dict_batch.items() for node in nodes]
posteriors_dict_batch = query_trained_model(args, index_update_batch, query_graph_batch, mode)

print('Finish generating posteriors and mapping dict...')
return posteriors_dict_batch, index_mapping_dict_batch


def generate_batch_features(args, batch_pairs, g, k, mode, feature_type):
"""
A generic function to generate features for different types of graph-based batches.

Args:
args: Arguments for the model and batch generation.
batch_pairs: List of node pairs for which the batch is generated.
g: The graph object (DGL graph).
k: The number of hops to consider for subgraphs.
mode: Mode for querying the trained model.
feature_type: The type of features to generate ('node', 'graph', or 'all').

Returns:
Features, labels, and statistical data for the batch.
"""
posteriors_dict_batch, index_mapping_dict_batch = get_batch(args, batch_pairs, g, k, mode)

if feature_type == 'node':
return generate_features(args, g, batch_pairs, posteriors_dict_batch, index_mapping_dict_batch)
elif feature_type == 'graph':
return generate_features(args, g, batch_pairs, posteriors_dict_batch, mode, index_mapping_dict_batch)
elif feature_type == 'all':
return generate_features(args, g, batch_pairs, posteriors_dict_batch, mode, index_mapping_dict_batch)
else:
return generate_features(args, batch_pairs, posteriors_dict_batch, index_mapping_dict_batch)


def get_khop_query_graph_batch(g, pairs, k=2):
"""
Generates a k-hop subgraph for each node pair and returns a batched graph.

Args:
g: The graph object (DGL graph).
pairs: List of node pairs for which k-hop neighborhoods are generated.
k: The number of hops to consider for subgraphs.

Returns:
A batched graph containing the k-hop subgraphs and a mapping of node indices.
"""
nx_g = dgl.to_networkx(g, node_attrs=["features"])
subgraph_list = []
index_mapping_dict = {}
bias = 0

for pair in pairs:
start_node, end_node = pair
nx_g.remove_edges_from([(start_node, end_node), (end_node, start_node)])

node_index = []
for node in (start_node, end_node):
node_neighbors = list(nx.ego.ego_graph(nx_g, n=node, radius=k).nodes())
node_new_index = node_neighbors.index(node)
subgraph_k_hop = g.subgraph(node_neighbors)
subgraph_list.append(subgraph_k_hop)
node_index.append(node_new_index + bias)
bias += len(node_neighbors)

nx_g.add_edges_from([(start_node, end_node), (end_node, start_node)])
index_mapping_dict[(start_node, end_node)] = (node_index[0], node_index[1])

update_query_graph = dgl.batch(subgraph_list)
print("Get k-hop query graph")
return update_query_graph, index_mapping_dict


# Wrapper functions for specific batch feature types
def get_batch_posteriors(args, batch_pairs, g, k, mode):
batch_features, batch_labels, batch_stat_dict = generate_batch_features(args, batch_pairs, g, k, mode, 'default')
return batch_features, batch_labels, batch_stat_dict


def get_batch_posteriors_node(args, batch_pairs, g, k, mode):
batch_node_features, batch_posteriors_features, batch_labels, batch_stat_dict = generate_batch_features(args, batch_pairs, g, k, mode, 'node')
return batch_node_features, batch_posteriors_features, batch_labels, batch_stat_dict


def get_batch_posteriors_graph(args, batch_pairs, g, k, mode):
batch_posteriors_features, batch_graph_features, batch_labels, batch_stat_dict = generate_batch_features(args, batch_pairs, g, k, mode, 'graph')
return batch_posteriors_features, batch_graph_features, batch_labels, batch_stat_dict


def get_batch_posteriors_node_graph(args, batch_pairs, g, k, mode):
batch_node_features, batch_posteriors_features, batch_graph_features, batch_labels, batch_stat_dict = generate_batch_features(args, batch_pairs, g, k, mode, 'all')
return batch_node_features, batch_posteriors_features, batch_graph_features, batch_labels, batch_stat_dict
Loading