From 64b9010792a198888fb9f57b13c87939d8db088b Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Fri, 19 Sep 2025 21:29:56 +0500 Subject: [PATCH 01/22] Create custom_attack.py --- pygip/src/custom_attack.py | 198 +++++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 pygip/src/custom_attack.py diff --git a/pygip/src/custom_attack.py b/pygip/src/custom_attack.py new file mode 100644 index 0000000..1c2d7d3 --- /dev/null +++ b/pygip/src/custom_attack.py @@ -0,0 +1,198 @@ +# src/custom_attack.py +import os +import random +from typing import Optional, Union + +import torch +import torch.nn.functional as F + +from src.dataset import Dataset +from src.attacks import BaseAttack +from src.models import GraphSAGE, GCN +from src.train_target import train_masked_target + + +def evaluate_model(model: torch.nn.Module, data, device: torch.device): + model.eval() + data = data.to(device) + with torch.no_grad(): + logits = model(data.x, data.edge_index) + preds = logits.argmax(dim=1) + mask = getattr(data, "test_mask", None) or getattr(data, "val_mask", None) + if mask is None: + mask = torch.ones(data.num_nodes, dtype=torch.bool, device=device) + return (preds[mask] == data.y[mask]).float().mean().item() + + +class FeatureFlipAttack(BaseAttack): + """ + Custom attack that perturbs node features for a fraction of nodes. + Conforms to the PyGIP BaseAttack API: + - attack() + - _load_model() + - _train_target_model() + - _train_attack_model() + """ + + supported_api_types = {"pyg"} + supported_datasets = set() + + def __init__(self, dataset: Dataset, attack_node_fraction: float, model_path: str = None, + device: Optional[Union[str, torch.device]] = None): + # must call super() so BaseAttack sets self.device and graph fields + super().__init__(dataset, attack_node_fraction, model_path, device) + + if not (0.0 < self.attack_node_fraction <= 1.0): + raise ValueError("attack_node_fraction must be in (0,1].") + + def _seed(self, seed: int = 0): + random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + + def _load_model(self): + """ + Try to load a model checkpoint at self.model_path. + Expected checkpoint formats: + - state_dict or + - dict with 'state_dict' / 'model_state' and optional 'model_type' metadata + Returns a model instance moved to self.device or None if no model_path provided. + """ + if not self.model_path: + return None + if not os.path.exists(self.model_path): + print(f"[FeatureFlipAttack] model_path {self.model_path} not found.") + return None + + checkpoint = torch.load(self.model_path, map_location=self.device) + state = checkpoint.get("state_dict", checkpoint) if isinstance(checkpoint, dict) else checkpoint + model_type = checkpoint.get("model_type", None) if isinstance(checkpoint, dict) else None + + # minimal heuristic for model_type + if model_type is None and isinstance(state, dict): + keys = list(state.keys()) + if any("lin_l" in k or "lin_r" in k for k in keys): + model_type = "GraphSAGE" + else: + model_type = "GCN" + + # instantiate appropriate model + in_dim = self.num_features + out_dim = self.num_classes + hid = checkpoint.get("hidden", 64) if isinstance(checkpoint, dict) else 64 + + if model_type == "GraphSAGE": + model = GraphSAGE(in_dim, out_dim, hidden=hid).to(self.device) + else: + model = GCN(in_dim, out_dim, hidden=hid).to(self.device) + + try: + # try multiple common keys + if "state_dict" in checkpoint: + state_dict = checkpoint["state_dict"] + elif "model_state" in checkpoint: + state_dict = checkpoint["model_state"] + else: + state_dict = state + model.load_state_dict(state_dict, strict=False) + model.eval() + return model + except Exception as e: + print(f"[FeatureFlipAttack] Failed to load weights: {e}") + return None + + def _train_target_model(self, train_epochs: int = 100, lr: float = 1e-2, seed: int = 0): + """ + Train a victim model (GraphSAGE) on the clean dataset and return the trained model. + Uses self.graph_data and self.device. + """ + self._seed(seed) + data = self.graph_data.to(self.device) + model = GraphSAGE(self.num_features, self.num_classes, hidden=64).to(self.device) + opt = torch.optim.Adam(model.parameters(), lr=lr) + + model.train() + for _ in range(train_epochs): + opt.zero_grad() + logits = model(data.x, data.edge_index) + loss = F.cross_entropy(logits[data.train_mask], data.y[data.train_mask]) + loss.backward() + opt.step() + + model.eval() + return model + + def _train_attack_model(self, dataset_name: str = "Cora", mask_ratio: float = 0.12, + hidden: int = 64, epochs: int = 200, seed: int = 0): + """ + Train a surrogate/attack model using the existing train_masked_target routine. + Returns path to checkpoint saved by train_masked_target. + """ + # train_masked_target will handle device selection via its arguments + ckpt_path = train_masked_target(dataset_name=dataset_name, + mask_ratio=mask_ratio, + hidden=hidden, + epochs=epochs, + seed=seed, + device=self.device) + return ckpt_path + + def _perturb_features(self, data, fraction: float): + """Return a clone of data with fraction of node features replaced by Gaussian noise.""" + pert = data.clone() + num_nodes = pert.num_nodes + k = max(1, int(fraction * num_nodes)) + idx = torch.randperm(num_nodes)[:k] + noise = torch.randn((k, pert.x.size(1)), device=pert.x.device) * 0.5 + pert.x[idx] = noise + return pert, k + + def attack(self, retrain_target: bool = True, retrain_epochs: int = 50, seed: int = 0): + """ + Main attack logic: + 1) Load or train a target model + 2) Evaluate baseline + 3) Perturb features on a fraction of nodes + 4) Optionally retrain a model on perturbed data and evaluate + Returns dict with metrics. + """ + self._seed(seed) + + if self.graph_data is None: + raise RuntimeError("No graph_data available in dataset.") + + # Step 1: get target model + model = self._load_model() or (self._train_target_model(train_epochs=retrain_epochs, seed=seed) if retrain_target else None) + if model is None: + model = self._train_target_model(train_epochs=retrain_epochs, seed=seed) + + # Step 2: eval before + acc_before = evaluate_model(model, self.graph_data, self.device) + + # Step 3: perturb features + perturbed_data, num_perturbed = self._perturb_features(self.graph_data, self.attack_node_fraction) + + # Step 4: retrain and evaluate on perturbed graph + model_pert = GraphSAGE(self.num_features, self.num_classes, hidden=64).to(self.device) + opt = torch.optim.Adam(model_pert.parameters(), lr=1e-2) + perturbed_data = perturbed_data.to(self.device) + + model_pert.train() + for _ in range(retrain_epochs): + opt.zero_grad() + logits = model_pert(perturbed_data.x, perturbed_data.edge_index) + loss = F.cross_entropy(logits[perturbed_data.train_mask], perturbed_data.y[perturbed_data.train_mask]) + loss.backward() + opt.step() + + acc_after = evaluate_model(model_pert, perturbed_data, self.device) + + results = { + "attack_name": "FeatureFlipAttack", + "attack_fraction": self.attack_node_fraction, + "num_perturbed": num_perturbed, + "acc_before": acc_before, + "acc_after": acc_after, + } + return results From 9979d4044985202e6e20f6a7de80b47689392e9a Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Fri, 19 Sep 2025 21:32:43 +0500 Subject: [PATCH 02/22] Create custom_defense.py --- pygip/src/custom_defense.py | 170 ++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 pygip/src/custom_defense.py diff --git a/pygip/src/custom_defense.py b/pygip/src/custom_defense.py new file mode 100644 index 0000000..46dca0e --- /dev/null +++ b/pygip/src/custom_defense.py @@ -0,0 +1,170 @@ +# src/custom_defense.py +import torch +import torch.nn.functional as F + +from src.dataset import Dataset +from src.defenses import BaseDefense +from src.models import GraphSAGE +from src.train_target import train_masked_target + + +def evaluate_model(model: torch.nn.Module, data, device: torch.device): + model.eval() + data = data.to(device) + with torch.no_grad(): + logits = model(data.x, data.edge_index) + preds = logits.argmax(dim=1) + mask = getattr(data, "test_mask", None) or getattr(data, "val_mask", None) + if mask is None: + mask = torch.ones(data.num_nodes, dtype=torch.bool, device=device) + return (preds[mask] == data.y[mask]).float().mean().item() + + +class NeighborSmoothingDefense(BaseDefense): + """ + Defense that smooths features by neighbor averaging and retrains a model. + Implements required BaseDefense hooks: + - defend() + - _load_model() + - _train_target_model() + - _train_defense_model() + - _train_surrogate_model() + """ + + supported_api_types = {"pyg"} + supported_datasets = set() + + def __init__(self, dataset: Dataset, attack_node_fraction: float, device: Optional[torch.device] = None): + super().__init__(dataset, attack_node_fraction, device) + + @staticmethod + def smooth_features(data): + row, col = data.edge_index + acc = torch.zeros_like(data.x) + deg = torch.zeros(data.num_nodes, device=data.x.device) + acc.index_add_(0, row, data.x[col]) + deg.index_add_(0, row, torch.ones(col.size(0), device=data.x.device)) + deg = deg.clamp(min=1.0).unsqueeze(1) + return acc / deg + + def _load_model(self): + """ + Attempt to load a model from self.model_path (optional). + Mirrors _load_model style from attack. + """ + if not getattr(self, "model_path", None): + return None + if not os.path.exists(self.model_path): + print(f"[NeighborSmoothingDefense] model_path {self.model_path} not found.") + return None + + checkpoint = torch.load(self.model_path, map_location=self.device) + state = checkpoint.get("state_dict", checkpoint) if isinstance(checkpoint, dict) else checkpoint + model_type = checkpoint.get("model_type", None) if isinstance(checkpoint, dict) else None + + if model_type is None and isinstance(state, dict): + keys = list(state.keys()) + if any("lin_l" in k or "lin_r" in k for k in keys): + model_type = "GraphSAGE" + else: + model_type = "GCN" + + in_dim = self.num_features + out_dim = self.num_classes + hid = checkpoint.get("hidden", 64) if isinstance(checkpoint, dict) else 64 + + if model_type == "GraphSAGE": + model = GraphSAGE(in_dim, out_dim, hidden=hid).to(self.device) + else: + model = GraphSAGE(in_dim, out_dim, hidden=hid).to(self.device) + + try: + if "state_dict" in checkpoint: + state_dict = checkpoint["state_dict"] + elif "model_state" in checkpoint: + state_dict = checkpoint["model_state"] + else: + state_dict = state + model.load_state_dict(state_dict, strict=False) + model.eval() + return model + except Exception as e: + print(f"[NeighborSmoothingDefense] load failed: {e}") + return None + + def _train_target_model(self, data, epochs: int = 50, lr: float = 1e-2, seed: int = 0): + """ + Train a standard target model on the provided data and return it. + This matches the framework hook signature: accepts data and returns model. + """ + torch.manual_seed(seed) + data = data.to(self.device) + model = GraphSAGE(self.num_features, self.num_classes, hidden=64).to(self.device) + opt = torch.optim.Adam(model.parameters(), lr=lr) + + model.train() + for _ in range(epochs): + opt.zero_grad() + logits = model(data.x, data.edge_index) + loss = F.cross_entropy(logits[data.train_mask], data.y[data.train_mask]) + loss.backward() + opt.step() + + model.eval() + return model + + def _train_defense_model(self, data, epochs: int = 50, lr: float = 1e-2, seed: int = 0): + """ + Train model using defense-prepared data (e.g., smoothed features). + """ + # Behavior mirrors _train_target_model; kept separate for clarity + return self._train_target_model(data, epochs=epochs, lr=lr, seed=seed) + + def _train_surrogate_model(self, dataset_name: str = "Cora", mask_ratio: float = 0.12, + hidden: int = 64, epochs: int = 200, seed: int = 0): + """ + Train a surrogate (attacker's) model using the provided train_masked_target helper. + Returns path to saved checkpoint. + """ + ckpt_path = train_masked_target(dataset_name=dataset_name, + mask_ratio=mask_ratio, + hidden=hidden, + epochs=epochs, + seed=seed, + device=self.device) + return ckpt_path + + def defend(self, retrain_epochs: int = 50, seed: int = 0): + """ + Defense workflow: + 1) Train baseline target + 2) Train surrogate (optional) - returns ckpt path for analysis + 3) Apply smoothing and train defense model + 4) Return metrics dictionary + """ + if self.graph_data is None: + raise RuntimeError("No graph_data available in dataset.") + + # Baseline target (trained on original data) + baseline_model = self._train_target_model(self.graph_data, epochs=retrain_epochs, seed=seed) + acc_baseline = evaluate_model(baseline_model, self.graph_data, self.device) + + # Optionally train surrogate for evaluation/debug (not used directly here) + # surrogate_ckpt = self._train_surrogate_model(dataset_name=self.dataset.dataset_name, + # mask_ratio=0.12, hidden=64, epochs=200, seed=seed) + + # Apply smoothing to features + smoothed = self.graph_data.clone() + smoothed.x = self.smooth_features(smoothed) + + # Train defense model on smoothed features + defense_model = self._train_defense_model(smoothed, epochs=max(10, retrain_epochs // 2), seed=seed) + acc_defended = evaluate_model(defense_model, smoothed, self.device) + + results = { + "defense_name": "NeighborSmoothingDefense", + "acc_baseline": acc_baseline, + "acc_defended": acc_defended, + "attack_node_fraction": self.attack_node_fraction, + } + return results From 90fc322c9a1468bc920f25647f220b028268a24f Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Fri, 19 Sep 2025 21:38:18 +0500 Subject: [PATCH 03/22] Create run_custom_attack.py --- examples/run_custom_attack.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 examples/run_custom_attack.py diff --git a/examples/run_custom_attack.py b/examples/run_custom_attack.py new file mode 100644 index 0000000..d919ff1 --- /dev/null +++ b/examples/run_custom_attack.py @@ -0,0 +1,19 @@ +# examples/run_custom_attack.py +import argparse +from pygip.datasets.datasets import Dataset +from src.custom_attack import FeatureFlipAttack + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--dataset", type=str, default="Cora") + parser.add_argument("--fraction", type=float, default=0.1) + parser.add_argument("--device", type=str, default=None) + args = parser.parse_args() + + dataset = Dataset(api_type="pyg", path="./data") + attack = FeatureFlipAttack(dataset, attack_node_fraction=args.fraction, model_path=None, device=args.device) + results = attack.attack(retrain_target=True, retrain_epochs=50, seed=0) + print("Attack results:", results) + +if __name__ == "__main__": + main() From ff82d3dbb2a9955f4414894b8a58adbb375acc06 Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Fri, 19 Sep 2025 21:41:42 +0500 Subject: [PATCH 04/22] Create run_custom_defense.py --- examples/run_custom_defense.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 examples/run_custom_defense.py diff --git a/examples/run_custom_defense.py b/examples/run_custom_defense.py new file mode 100644 index 0000000..237fc4d --- /dev/null +++ b/examples/run_custom_defense.py @@ -0,0 +1,19 @@ +# examples/run_custom_defense.py +import argparse +from pygip.datasets.datasets import Dataset +from src.custom_defense import NeighborSmoothingDefense + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--dataset", type=str, default="Cora") + parser.add_argument("--fraction", type=float, default=0.1) + parser.add_argument("--device", type=str, default=None) + args = parser.parse_args() + + dataset = Dataset(api_type="pyg", path="./data") + defense = NeighborSmoothingDefense(dataset, attack_node_fraction=args.fraction, device=args.device) + results = defense.defend(retrain_epochs=50, seed=0) + print("Defense results:", results) + +if __name__ == "__main__": + main() From 5fce2df7008f28f72e382c2f6263dfa0dd6cd16a Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Sat, 20 Sep 2025 00:15:53 +0500 Subject: [PATCH 05/22] Update run_custom_attack.py --- examples/run_custom_attack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/run_custom_attack.py b/examples/run_custom_attack.py index d919ff1..a51d149 100644 --- a/examples/run_custom_attack.py +++ b/examples/run_custom_attack.py @@ -1,7 +1,7 @@ # examples/run_custom_attack.py import argparse from pygip.datasets.datasets import Dataset -from src.custom_attack import FeatureFlipAttack +from pygip.src.custom_attack import FeatureFlipAttack def main(): parser = argparse.ArgumentParser() From 4b9ca871d2c5fa09143d28abd32e2457425aa737 Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Sat, 20 Sep 2025 00:39:20 +0500 Subject: [PATCH 06/22] Update run_custom_attack.py --- examples/run_custom_attack.py | 47 ++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/examples/run_custom_attack.py b/examples/run_custom_attack.py index a51d149..52088f5 100644 --- a/examples/run_custom_attack.py +++ b/examples/run_custom_attack.py @@ -1,19 +1,36 @@ -# examples/run_custom_attack.py -import argparse +# # examples/run_custom_attack.py +# import argparse +# from pygip.datasets.datasets import Dataset +# from pygip.src.custom_attack import FeatureFlipAttack + +# def main(): +# parser = argparse.ArgumentParser() +# parser.add_argument("--dataset", type=str, default="Cora") +# parser.add_argument("--fraction", type=float, default=0.1) +# parser.add_argument("--device", type=str, default=None) +# args = parser.parse_args() + +# dataset = Dataset(api_type="pyg", path="./data") +# attack = FeatureFlipAttack(dataset, attack_node_fraction=args.fraction, model_path=None, device=args.device) +# results = attack.attack(retrain_target=True, retrain_epochs=50, seed=0) +# print("Attack results:", results) + +# if __name__ == "__main__": +# main() +import sys +sys.path.insert(0, '/content/PyGIP') + +# Import the modules from pygip.datasets.datasets import Dataset from pygip.src.custom_attack import FeatureFlipAttack -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--dataset", type=str, default="Cora") - parser.add_argument("--fraction", type=float, default=0.1) - parser.add_argument("--device", type=str, default=None) - args = parser.parse_args() - - dataset = Dataset(api_type="pyg", path="./data") - attack = FeatureFlipAttack(dataset, attack_node_fraction=args.fraction, model_path=None, device=args.device) - results = attack.attack(retrain_target=True, retrain_epochs=50, seed=0) - print("Attack results:", results) +# Set parameters directly instead of using argparse +dataset_name = "Cora" +fraction = 0.25 +device = None -if __name__ == "__main__": - main() +# Run the attack +dataset = Dataset(api_type="pyg", path="./data") +attack = FeatureFlipAttack(dataset, attack_node_fraction=fraction, model_path=None, device=device) +results = attack.attack(retrain_target=True, retrain_epochs=50, seed=0) +print("Attack results:", results) From ca5d8e463c9881a37cface3755d570ea5ea65ce4 Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Wed, 8 Oct 2025 17:32:51 +0500 Subject: [PATCH 07/22] Add files via upload --- examples/adversial.py | 240 +++++++++++++++++++++++++++++ examples/adversial_table8.py | 177 +++++++++++++++++++++ examples/run_bboxve.py | 164 ++++++++++++++++++++ examples/run_bgrove.py | 208 +++++++++++++++++++++++++ examples/run_table5.py | 288 +++++++++++++++++++++++++++++++++++ 5 files changed, 1077 insertions(+) create mode 100644 examples/adversial.py create mode 100644 examples/adversial_table8.py create mode 100644 examples/run_bboxve.py create mode 100644 examples/run_bgrove.py create mode 100644 examples/run_table5.py diff --git a/examples/adversial.py b/examples/adversial.py new file mode 100644 index 0000000..7099256 --- /dev/null +++ b/examples/adversial.py @@ -0,0 +1,240 @@ +# analyze_tables_extended.py +# Reproduce Table 6 (Fine-tuning robustness) and Table 7 (False positives) +# Matches Zhou et al. 2024 format + +import os, sys, copy +import numpy as np, pandas as pd +import torch, torch.nn.functional as F +from torch_geometric.data import Data +from torch_geometric.utils import subgraph +from sklearn.decomposition import PCA + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from run_table5 import ( + load_dataset, set_seed, build_model, + train_model, model_to_vector_probs, get_setting_architectures, COwn +) + +# ----------------------------- +# Config +# ----------------------------- +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +MODEL_TRAIN_EPOCHS = 80 +COWN_TRAIN_EPOCHS = 40 +FINETUNE_EPOCHS = 20 +INDEPENDENT_MODEL_EPOCHS = 40 +SEEDS = [0, 1, 2] + +# ----------------------------- +# Fine-tuning (FGSM-like) +# ----------------------------- +def finetune_model(model, data, train_mask, epochs=20, lr=0.005, device="cpu"): + model_ft = copy.deepcopy(model).to(device) + + data_adv = Data( + x=data.x.clone().detach().to(device), + edge_index=data.edge_index.clone().to(device), + y=data.y.clone().to(device) + ) + data_adv.x.requires_grad = True + opt = torch.optim.Adam(model_ft.parameters(), lr=lr, weight_decay=5e-4) + + for epoch in range(epochs): + model_ft.train() + opt.zero_grad() + out = model_ft(data_adv.x, data_adv.edge_index) + loss = F.cross_entropy(out[train_mask], data_adv.y[train_mask]) + loss.backward() + + with torch.no_grad(): + if data_adv.x.grad is not None: + epsilon = 0.02 * (epoch + 1) / epochs + grad_sign = data_adv.x.grad.sign() + data_adv.x.data = data_adv.x.data + epsilon * grad_sign + data_adv.x.grad.zero_() + + opt.step() + return model_ft + +# ----------------------------- +# Ownership verifier training +# ----------------------------- +def train_ownership_verifier(data, setting, device="cpu"): + in_dim, out_dim = data.num_features, len(torch.unique(data.y)) + Fs, Find, lFs, lFind = get_setting_architectures(setting) + + owner_vecs, independent_vecs = [], [] + + # Owner models + for seed in SEEDS: + set_seed(seed) + mask = torch.randperm(data.num_nodes)[:int(0.6 * data.num_nodes)] + train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + train_mask[mask] = True + for arch in Fs: + m = build_model(arch, in_dim, out_dim, lFs) + m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=device) + owner_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) + + # Independent models + for seed in SEEDS: + set_seed(seed + 100) + mask = torch.randperm(data.num_nodes)[:int(0.3 * data.num_nodes)] + ind_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + ind_mask[mask] = True + for arch in Find: + m = build_model(arch, in_dim, out_dim, lFind) + m = train_model(m, data, ind_mask, epochs=INDEPENDENT_MODEL_EPOCHS, device=device) + independent_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) + + X_owner_np = np.vstack(owner_vecs) + X_ind_np = np.vstack(independent_vecs) + + # Reduce to 128-d + X_all = np.vstack([X_owner_np, X_ind_np]) + n_samples, n_features = X_all.shape + n_comp = min(128, n_samples, n_features) + if n_comp < n_features: + pca = PCA(n_components=n_comp) + X_all = pca.fit_transform(X_all) + if X_all.shape[1] < 128: + padding = np.zeros((X_all.shape[0], 128 - X_all.shape[1])) + X_all = np.hstack([X_all, padding]) + + n_owner = len(owner_vecs) + X_owner_np = X_all[:n_owner] + X_ind_np = X_all[n_owner:] + + # Train classifier + X_train = torch.tensor(X_all, dtype=torch.float32, device=device) + y_train = torch.tensor(np.hstack([np.ones(n_owner), np.zeros(len(X_ind_np))]), + dtype=torch.long, device=device) + cown = COwn(input_dim=128).to(device) + opt = torch.optim.Adam(cown.parameters(), lr=0.001) + + for epoch in range(COWN_TRAIN_EPOCHS): + cown.train() + opt.zero_grad() + logits = cown(X_train) + loss = F.cross_entropy(logits, y_train) + loss.backward() + opt.step() + + return cown, X_owner_np, X_ind_np + +# ----------------------------- +# Eval metrics (FPR, FNR, ACC) +# ----------------------------- +def evaluate_cown(cown, X_owner_np, X_ind_np, device="cpu"): + X_owner = torch.tensor(X_owner_np, dtype=torch.float32, device=device) + X_ind = torch.tensor(X_ind_np, dtype=torch.float32, device=device) + + cown.eval() + with torch.no_grad(): + preds_owner = cown(X_owner).argmax(dim=1).cpu().numpy() + preds_ind = cown(X_ind).argmax(dim=1).cpu().numpy() + + fnr = (preds_owner == 0).mean() * 100 + fpr = (preds_ind == 1).mean() * 100 + acc = ( (preds_owner == 1).sum() + (preds_ind == 0).sum() ) / (len(preds_owner)+len(preds_ind)) * 100 + return fpr, fnr, acc + +# ----------------------------- +# False positives (Table 7) +# ----------------------------- +def run_false_positive_experiment(data_orig, dataset_name, setting, cown, node_order, device="cpu", repeats=5): + in_dim, out_dim = data_orig.num_features, len(torch.unique(data_orig.y)) + Fs, Find, lFs, lFind = get_setting_architectures(setting) + + fpr_list = [] + for rep in range(repeats): + set_seed(rep + 500) + num_nodes = data_orig.num_nodes + independent_train = torch.randperm(num_nodes)[:int(0.3 * num_nodes)] + independent_mask = torch.zeros(num_nodes, dtype=torch.bool) + independent_mask[independent_train] = True + + independent_vecs = [] + for arch in Find: + m = build_model(arch, in_dim, out_dim, lFind) + m = train_model(m, data_orig, independent_mask, epochs=INDEPENDENT_MODEL_EPOCHS, device=device) + independent_vecs.append(model_to_vector_probs(m, data_orig, node_order)) + + X_independent_np = np.vstack(independent_vecs) + n_samples, n_features = X_independent_np.shape + n_comp = min(128, n_samples, n_features) + if n_comp < n_features: + pca = PCA(n_components=n_comp) + X_independent_np = pca.fit_transform(X_independent_np) + if X_independent_np.shape[1] < 128: + padding = np.zeros((X_independent_np.shape[0], 128 - X_independent_np.shape[1])) + X_independent_np = np.hstack([X_independent_np, padding]) + + X_independent = torch.tensor(X_independent_np, dtype=torch.float32, device=device) + cown.eval() + with torch.no_grad(): + preds = cown(X_independent).argmax(dim=1).cpu().numpy() + + fpr = (preds == 1).mean() * 100 + fpr_list.append(fpr) + + return np.mean(fpr_list), np.std(fpr_list) + +# ----------------------------- +# Generate Table 6 and Table 7 +# ----------------------------- +def generate_tables(all_results_csv="results/table5_all_results.csv"): + df = pd.read_csv(all_results_csv) + if "cown_acc_mean" not in df.columns: + raise KeyError("Expected 'cown_acc_mean' in all_results.csv") + + os.makedirs("results", exist_ok=True) + table6, table7 = [], [] + + for (ds, st, md), sub in df.groupby(["dataset", "setting", "mode"]): + print(f"\n=== {ds} / Setting {st} / Mode {md} ===") + + data, _ = load_dataset(ds, device=DEVICE) + num_nodes = data.num_nodes + train_nodes = torch.randperm(num_nodes)[:int(0.6 * num_nodes)] + train_mask = torch.zeros(num_nodes, dtype=torch.bool) + train_mask[train_nodes] = True + + # Train + fine-tune + Fs, Find, lFs, lFind = get_setting_architectures(st) + target_arch = Fs[0] if len(Fs) > 0 else "GCN" + m = build_model(target_arch, data.num_features, len(torch.unique(data.y)), lFs) + m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=DEVICE) + m_finetuned = finetune_model(m, data, train_mask, epochs=FINETUNE_EPOCHS, device=DEVICE) + + ori_acc = (m(data.x.to(DEVICE), data.edge_index.to(DEVICE)).argmax(dim=1) == data.y.to(DEVICE)).float().mean().item() * 100 + + # Train C_own + trained_cown, X_owner_np, X_ind_np = train_ownership_verifier(data, st, device=DEVICE) + fpr, fnr, acc_cown = evaluate_cown(trained_cown, X_owner_np, X_ind_np, device=DEVICE) + + # Table 6 + table6.append({ + "Dataset": ds, "Setting": st, "Mode": md, + "Ori_ACC(%)": round(ori_acc, 2), + "FPR(%)": round(fpr, 2), + "FNR(%)": round(fnr, 2), + "Fine_ACC(%)": round(acc_cown, 2) + }) + + # Table 7 + node_order = torch.arange(data.num_nodes) + fpr_mean, fpr_std = run_false_positive_experiment(data, ds, st, trained_cown, node_order, device=DEVICE) + table7.append({ + "Dataset": ds, "Setting": st, "Mode": md, + "FPR": f"{fpr_mean:.2f} ± {fpr_std:.2f}" + }) + + pd.DataFrame(table6).to_csv("results/table6.csv", index=False) + pd.DataFrame(table7).to_csv("results/table7.csv", index=False) + print("\n✅ Saved results/table6.csv and table7.csv") + + +# ----------------------------- +if __name__ == "__main__": + generate_tables() diff --git a/examples/adversial_table8.py b/examples/adversial_table8.py new file mode 100644 index 0000000..5c3b144 --- /dev/null +++ b/examples/adversial_table8.py @@ -0,0 +1,177 @@ +# analyze_table8_double_extraction.py +# Reproduce Table 8 (Double Extraction Robustness) +# Matches Zhou et al. 2024 format + +import os, sys, copy +import numpy as np, pandas as pd +import torch, torch.nn.functional as F +from torch_geometric.data import Data +from sklearn.decomposition import PCA + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from run_table5 import ( + load_dataset, set_seed, build_model, + train_model, model_to_vector_probs, get_setting_architectures, COwn +) + +# ----------------------------- +# Config +# ----------------------------- +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +MODEL_TRAIN_EPOCHS = 80 +COWN_TRAIN_EPOCHS = 40 +EXTRACT_EPOCHS = 40 +SEEDS = [0, 1, 2] + +# ----------------------------- +# Double Extraction +# ----------------------------- +def extract_once(target_model, data, epochs=EXTRACT_EPOCHS, device="cpu"): + """Perform a single extraction attack using pseudo-labels from target_model.""" + target_model.eval() + with torch.no_grad(): + logits = target_model(data.x.to(device), data.edge_index.to(device)) + pseudo_labels = logits.argmax(dim=1).cpu() + + extracted = build_model("GCN", data.num_features, len(torch.unique(data.y)), 2) + mask = torch.ones(data.num_nodes, dtype=torch.bool) + extracted = train_model(extracted, data, mask, epochs=epochs, device=device) + return extracted + + +def double_extract_model(target_model, data, epochs=EXTRACT_EPOCHS, device="cpu"): + """Perform two rounds of extraction: F -> Ft -> Fs.""" + Ft = extract_once(target_model, data, epochs=epochs, device=device) + Fs = extract_once(Ft, data, epochs=epochs, device=device) + return Fs + + +# ----------------------------- +# Ownership verifier training +# ----------------------------- +def train_ownership_verifier(data, setting, device="cpu"): + in_dim, out_dim = data.num_features, len(torch.unique(data.y)) + Fs, Find, lFs, lFind = get_setting_architectures(setting) + owner_vecs, independent_vecs = [], [] + + # Owner models + for seed in SEEDS: + set_seed(seed) + mask = torch.randperm(data.num_nodes)[:int(0.6 * data.num_nodes)] + train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + train_mask[mask] = True + for arch in Fs: + m = build_model(arch, in_dim, out_dim, lFs) + m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=device) + owner_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) + + # Independent models + for seed in SEEDS: + set_seed(seed + 100) + mask = torch.randperm(data.num_nodes)[:int(0.3 * data.num_nodes)] + ind_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + ind_mask[mask] = True + for arch in Find: + m = build_model(arch, in_dim, out_dim, lFind) + m = train_model(m, data, ind_mask, epochs=MODEL_TRAIN_EPOCHS, device=device) + independent_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) + + X_owner_np = np.vstack(owner_vecs) + X_ind_np = np.vstack(independent_vecs) + X_all = np.vstack([X_owner_np, X_ind_np]) + + n_samples, n_features = X_all.shape + n_comp = min(128, n_samples, n_features) + if n_comp < n_features: + pca = PCA(n_components=n_comp) + X_all = pca.fit_transform(X_all) + if X_all.shape[1] < 128: + padding = np.zeros((X_all.shape[0], 128 - X_all.shape[1])) + X_all = np.hstack([X_all, padding]) + + n_owner = len(owner_vecs) + X_owner_np = X_all[:n_owner] + X_ind_np = X_all[n_owner:] + + X_train = torch.tensor(X_all, dtype=torch.float32, device=device) + y_train = torch.tensor(np.hstack([np.ones(n_owner), np.zeros(len(X_ind_np))]), + dtype=torch.long, device=device) + cown = COwn(input_dim=128).to(device) + opt = torch.optim.Adam(cown.parameters(), lr=0.001) + + for epoch in range(COWN_TRAIN_EPOCHS): + cown.train() + opt.zero_grad() + logits = cown(X_train) + loss = F.cross_entropy(logits, y_train) + loss.backward() + opt.step() + + return cown, X_owner_np, X_ind_np + + +# ----------------------------- +# Eval metrics (FPR, FNR, ACC) +# ----------------------------- +def evaluate_cown(cown, X_owner_np, X_ind_np, device="cpu"): + X_owner = torch.tensor(X_owner_np, dtype=torch.float32, device=device) + X_ind = torch.tensor(X_ind_np, dtype=torch.float32, device=device) + cown.eval() + with torch.no_grad(): + preds_owner = cown(X_owner).argmax(dim=1).cpu().numpy() + preds_ind = cown(X_ind).argmax(dim=1).cpu().numpy() + fnr = (preds_owner == 0).mean() * 100 + fpr = (preds_ind == 1).mean() * 100 + acc = ((preds_owner == 1).sum() + (preds_ind == 0).sum()) / (len(preds_owner) + len(preds_ind)) * 100 + return fpr, fnr, acc + + +# ----------------------------- +# Generate Table 8 +# ----------------------------- +def generate_table8(all_results_csv="results/table5_all_results.csv"): + df = pd.read_csv(all_results_csv) + if "cown_acc_mean" not in df.columns: + raise KeyError("Expected 'cown_acc_mean' in all_results.csv") + + os.makedirs("results", exist_ok=True) + table8 = [] + + for (ds, st, md), sub in df.groupby(["dataset", "setting", "mode"]): + print(f"\n=== {ds} / Setting {st} / Mode {md} ===") + data, _ = load_dataset(ds, device=DEVICE) + num_nodes = data.num_nodes + train_nodes = torch.randperm(num_nodes)[:int(0.6 * num_nodes)] + train_mask = torch.zeros(num_nodes, dtype=torch.bool) + train_mask[train_nodes] = True + + # Train base target + Fs, Find, lFs, lFind = get_setting_architectures(st) + target_arch = Fs[0] if len(Fs) > 0 else "GCN" + m = build_model(target_arch, data.num_features, len(torch.unique(data.y)), lFs) + m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=DEVICE) + + ori_acc = (m(data.x.to(DEVICE), data.edge_index.to(DEVICE)).argmax(dim=1) == data.y.to(DEVICE)).float().mean().item() * 100 + + # Perform double extraction + m_double = double_extract_model(m, data, epochs=EXTRACT_EPOCHS, device=DEVICE) + + # Train ownership verifier + trained_cown, X_owner_np, X_ind_np = train_ownership_verifier(data, st, device=DEVICE) + fpr, fnr, acc_cown = evaluate_cown(trained_cown, X_owner_np, X_ind_np, device=DEVICE) + + table8.append({ + "Dataset": ds, "Setting": st, "Mode": md, + "Ori_ACC(%)": round(ori_acc, 2), + "FPR(%)": round(fpr, 2), + "FNR(%)": round(fnr, 2), + "Double_ACC(%)": round(acc_cown, 2) + }) + + pd.DataFrame(table8).to_csv("results/table8.csv", index=False) + print("\n✅ Saved results/table8.csv") + + +# ----------------------------- +if __name__ == "__main__": + generate_table8() diff --git a/examples/run_bboxve.py b/examples/run_bboxve.py new file mode 100644 index 0000000..79f544a --- /dev/null +++ b/examples/run_bboxve.py @@ -0,0 +1,164 @@ +""" +run_bboxve.py — Backdoor-based Ownership Verification (BBoxVe) in PyG. + +This script: +- Injects a backdoor watermark trigger into node features. +- Trains a target model and an extracted surrogate model. +- Evaluates clean and backdoor performance (TCA, TBA, ECA, EBA). +- Loops over datasets and models automatically. +- Saves all results to results/BboxVe_results.csv +""" + +import os, sys +import torch +import random +import numpy as np +import pandas as pd +import torch.nn.functional as F +from torch_geometric.datasets import Planetoid +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from pygip.models.nn.pyg_backbones import GCN, GAT, GraphSAGE, GIN, SGC + +# from torch_geometric.nn import GINConv, SGConv +import torch.nn as nn + + + + + +# ---------------------------- +# Helpers +# ---------------------------- +def set_seed(seed=0): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + + +def inject_backdoor(data, node_indices, num_features, fixed_val=10, trigger_size=35): + """Inject backdoor trigger on selected nodes.""" + poisoned_x = data.x.clone() + poisoned_y = data.y.clone() + least_class = torch.bincount(data.y).argmin() + + for idx in node_indices: + feat_ids = torch.randperm(num_features)[:trigger_size] + poisoned_x[idx, feat_ids] = fixed_val + poisoned_y[idx] = least_class + + return poisoned_x, poisoned_y + + +def train_model(model, data, train_idx, epochs=50, lr=0.01, device="cpu"): + model = model.to(device) + data = data.to(device) + opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) + + for epoch in range(epochs): + model.train() + opt.zero_grad() + out = model(data.x, data.edge_index) + loss = F.cross_entropy(out[train_idx], data.y[train_idx]) + loss.backward() + opt.step() + + return model + + +def evaluate(model, data, clean_idx, backdoor_idx): + model.eval() + with torch.no_grad(): + logits = model(data.x, data.edge_index) + preds = logits.argmax(dim=1) + + clean_acc = (preds[clean_idx] == data.y[clean_idx]).float().mean().item() + backdoor_acc = (preds[backdoor_idx] == data.y[backdoor_idx]).float().mean().item() + + return clean_acc * 100, backdoor_acc * 100 + + +# ---------------------------- +# Main Experiment +# ---------------------------- +def run_experiment(dataset_name, model_type, with_backdoor=True, device="cpu"): + dataset = Planetoid(root=f"data/{dataset_name}", name=dataset_name) + data = dataset[0].to(device) + num_nodes = data.num_nodes + + idx = torch.randperm(num_nodes) + train_idx = idx[: int(0.2 * num_nodes)] + surr_idx = idx[int(0.2 * num_nodes): int(0.6 * num_nodes)] + test_idx = idx[int(0.6 * num_nodes):] + + bd_train_idx = train_idx[torch.randperm(len(train_idx))[: int(0.15 * len(train_idx))]] + bd_test_idx = test_idx[torch.randperm(len(test_idx))[: int(0.10 * len(test_idx))]] + + if with_backdoor: + data.x, data.y = inject_backdoor(data, bd_train_idx, dataset.num_features) + data.x, data.y = inject_backdoor(data, bd_test_idx, dataset.num_features) + + # Select model + if model_type == "GCN": + model_fn = lambda: GCN(dataset.num_features, 64, dataset.num_classes) + elif model_type == "GAT": + model_fn = lambda: GAT(dataset.num_features, 64, dataset.num_classes) + elif model_type == "GraphSAGE": + model_fn = lambda: GraphSAGE(dataset.num_features, 64, dataset.num_classes) + elif model_type == "GIN": + model_fn = lambda: GIN(dataset.num_features, 64, dataset.num_classes) + elif model_type == "SGC": + model_fn = lambda: SGC(dataset.num_features, dataset.num_classes) + else: + raise ValueError(f"Unknown model type: {model_type}") + + target = train_model(model_fn(), data, train_idx, device=device) + + surr_data = data if with_backdoor else dataset[0].clone() + surrogate = train_model(model_fn(), surr_data, surr_idx, device=device) + + clean_idx = torch.tensor(list(set(test_idx.tolist()) - set(bd_test_idx.tolist())), dtype=torch.long) + TCA, TBA = evaluate(target, data, clean_idx, bd_test_idx) + ECA, EBA = evaluate(surrogate, data, clean_idx, bd_test_idx) + + return { + "Dataset": dataset_name, + "Model": model_type, + "Setting": "With Backdoor" if with_backdoor else "Without Backdoor", + "TCA": TCA, + "ECA": ECA, + "TBA": TBA, + "EBA": EBA + } + + +# ---------------------------- +# Runner +# ---------------------------- +if __name__ == "__main__": + set_seed(0) + device = "cuda" if torch.cuda.is_available() else "cpu" + os.makedirs("results", exist_ok=True) + out_file = "results/BboxVe_results.csv" + + datasets = ["Cora", "CiteSeer", "PubMed"] + models = ["GCN", "GAT", "GraphSAGE", "GIN", "SGC"] + + all_results = [] + + for dataset in datasets: + for model_type in models: + print(f"\n=== Running {dataset} | {model_type} | With Backdoor ===") + res = run_experiment(dataset, model_type, with_backdoor=True, device=device) + all_results.append(res) + + df = pd.DataFrame(all_results) + if os.path.exists(out_file): + df.to_csv(out_file, mode="a", header=False, index=False) + else: + df.to_csv(out_file, index=False) + + print("\n=== All Table 3 Rows Added ===") + print(df) diff --git a/examples/run_bgrove.py b/examples/run_bgrove.py new file mode 100644 index 0000000..e8e2c86 --- /dev/null +++ b/examples/run_bgrove.py @@ -0,0 +1,208 @@ +""" +examples/run_bgrove.py + +Integration of BGrOVe experiment (Table 4 reproduction) using PyGIP datasets and models. +- Preserves original evaluation: FPR, FNR, ACC +- Uses same dataset/model structure as main framework +""" + +import os, sys +import random +import numpy as np +import pandas as pd +import torch +import dgl +import torch.nn.functional as F +from sklearn.metrics.pairwise import cosine_similarity +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +# integrate with PyGIP +from pygip.datasets.pyg_datasets import Cora, CiteSeer, PubMed, DBLP, Amazon +from pygip.models.nn.pyg_backbones import GCN, GAT, GraphSAGE, GIN, SGC + + +# ---------------------------- +# Helpers +# ---------------------------- +def set_seed(seed=0): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + + +def train_model(model, data, train_mask, epochs=50, lr=0.01, device="cpu"): + model = model.to(device) + data = data.to(device) + optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) + for epoch in range(epochs): + model.train() + optimizer.zero_grad() + out = model(data.x, data.edge_index) + loss = F.cross_entropy(out[train_mask], data.y[train_mask]) + loss.backward() + optimizer.step() + return model + + +def get_posteriors(model, data, nodes): + model.eval() + with torch.no_grad(): + logits = model(data.x, data.edge_index)[nodes] + probs = F.softmax(logits, dim=1).cpu().numpy() + return probs + + +def compute_metrics(true_labels, pred_labels): + true_labels = np.array(true_labels) + pred_labels = np.array(pred_labels) + FP = np.sum((pred_labels == 1) & (true_labels == 0)) + FN = np.sum((pred_labels == 0) & (true_labels == 1)) + TN = np.sum((pred_labels == 0) & (true_labels == 0)) + TP = np.sum((pred_labels == 1) & (true_labels == 1)) + FPR = FP / (FP + TN + 1e-8) * 100 + FNR = FN / (FN + TP + 1e-8) * 100 + ACC = (TP + TN) / (TP + TN + FP + FN + 1e-8) * 100 + return FPR, FNR, ACC + + +# ---------------------------- +# Model Builder +# ---------------------------- +def build_model(model_type, in_dim, out_dim, layers=2): + if model_type == "GCN": + return GCN(in_dim, 16, out_dim) + elif model_type == "GraphSAGE": + return GraphSAGE(in_dim, 16, out_dim) + elif model_type == "GAT": + return GAT(in_dim, 16, out_dim) + elif model_type == "GIN": + return GIN(in_dim, 16, out_dim) + elif model_type == "SGC": + return SGC(in_dim, out_dim) + else: + raise ValueError(f"Unknown model type: {model_type}") + +# ---------------------------- +# Threshold tuning +# ---------------------------- +def tune_threshold(Fs_star, Fs, Find, data, query_nodes): + scores, labels = [], [] + for star in Fs_star: + probs_star = get_posteriors(star, data, query_nodes) + for surrogate in Fs: + sim = cosine_similarity(probs_star, get_posteriors(surrogate, data, query_nodes)).mean() + scores.append(sim) + labels.append(1) + for ind in Find: + sim = cosine_similarity(probs_star, get_posteriors(ind, data, query_nodes)).mean() + scores.append(sim) + labels.append(0) + best_thr, best_acc = 0.5, 0 + for thr in np.linspace(0.1, 0.99, 50): + preds = [1 if s > thr else 0 for s in scores] + _, _, acc = compute_metrics(labels, preds) + if acc > best_acc: + best_acc, best_thr = acc, thr + return best_thr + + +# ---------------------------- +# Main Experiment +# ---------------------------- +def run_bgrove_experiment(dataset_cls, condition="CondA ✓", setting="I", device="cpu"): + ds = dataset_cls(path="./data") + data = ds.graph_data.to(device) + in_dim, out_dim = ds.num_features, ds.num_classes + train_mask = data.train_mask + + overlapping = ["GCN", "GAT", "GraphSAGE"] + disjoint = ["GIN", "SGC"] + layers_same, layers_diff = 2, 3 + + if setting == "I": + arch_Fs, arch_Find = overlapping, overlapping + nFs, nFind = layers_same, layers_same + elif setting == "II": + arch_Fs, arch_Find = overlapping, overlapping + nFs, nFind = layers_diff, layers_same + elif setting == "III": + arch_Fs, arch_Find = disjoint, overlapping + nFs, nFind = layers_same, layers_same + elif setting == "IV": + arch_Fs, arch_Find = disjoint, overlapping + nFs, nFind = layers_diff, layers_same + else: + raise ValueError("Invalid setting") + + target = train_model(build_model("GCN", in_dim, out_dim, 2), data, train_mask, device=device) + + Fs = [train_model(build_model(a, in_dim, out_dim, nFs), data, train_mask, device=device) + for a in arch_Fs] + set_seed(123 if condition != "CondA ✓" else 0) + Fs_star = [train_model(build_model(a, in_dim, out_dim, nFs), data, train_mask, device=device) + for a in arch_Fs] + Find = [train_model(build_model(a, in_dim, out_dim, nFind), data, train_mask, device=device) + for a in arch_Find] + + num_queries = max(1, int(0.1 * data.num_nodes)) + query_nodes = torch.randperm(data.num_nodes)[:num_queries] + thr = tune_threshold(Fs_star, Fs, Find, data, query_nodes) + + true_labels, pred_labels = [], [] + for model in Fs + Find: + for star in Fs_star: + sim = cosine_similarity( + get_posteriors(model, data, query_nodes), + get_posteriors(star, data, query_nodes) + ).mean() + true_labels.append(1 if model in Fs else 0) + pred_labels.append(1 if sim > thr else 0) + return compute_metrics(true_labels, pred_labels) + + +# ---------------------------- +# Multi-seed Runner +# ---------------------------- +def run_multi(dataset_cls, condition, setting, device="cpu", seeds=[0, 1, 2, 3, 4]): + all_fpr, all_fnr, all_acc = [], [], [] + for seed in seeds: + set_seed(seed) + FPR, FNR, ACC = run_bgrove_experiment(dataset_cls, condition, setting, device) + all_fpr.append(FPR) + all_fnr.append(FNR) + all_acc.append(ACC) + fmt = lambda arr: f"{np.mean(arr):.2f} ± {np.std(arr):.2f}" + return fmt(all_fpr), fmt(all_fnr), fmt(all_acc) + + +# ---------------------------- +# Main Entry +# ---------------------------- +if __name__ == "__main__": + datasets = [Cora, CiteSeer, PubMed, DBLP, Amazon] + conditions = ["CondA ✓", "CondA ✗"] + settings = ["I", "II", "III", "IV"] + + total = len(datasets) * len(conditions) * len(settings) + results = {} + count = 0 + + for DatasetClass in datasets: + for cond in conditions: + for setting in settings: + count += 1 + print(f"\n=== [{count}/{total}] {DatasetClass.__name__}, {cond}, Setting {setting} ===") + FPR, FNR, ACC = run_multi(DatasetClass, cond, setting) + results[(DatasetClass.__name__, cond, setting)] = [FPR, FNR, ACC] + + df = pd.DataFrame.from_dict(results, orient="index", columns=["FPR (%)", "FNR (%)", "ACC (%)"]) + df.index = pd.MultiIndex.from_tuples(df.index, names=["Dataset", "Condition", "Setting"]) + + print("\n=== Table 4: BGrOVe Results (mean ± std) ===") + print(df) + os.makedirs("results", exist_ok=True) + path = "results/BGrOVe_table4.csv" + df.to_csv(path) + print(f"\n✅ Results saved to {path}") diff --git a/examples/run_table5.py b/examples/run_table5.py new file mode 100644 index 0000000..f251272 --- /dev/null +++ b/examples/run_table5.py @@ -0,0 +1,288 @@ +# run_table5_full.py +# Rewritten to reproduce Figure 3 & Table 5 from Zhou et al. (2024) with aggregation + stability fixes + +import os, random, numpy as np, pandas as pd, sys +import torch, torch.nn as nn, torch.nn.functional as F +from torch_geometric.datasets import Planetoid, Amazon, CitationFull +from torch_geometric.data import Data +from sklearn.model_selection import train_test_split +import matplotlib.pyplot as plt + + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from pygip.models.nn.pyg_backbones import GCN, GAT, GraphSAGE, GIN, SGC + +# ---------------------------- +# Config +# ---------------------------- +SEEDS = [0, 1] +NUM_INDEP = 3 # fewer independent models +NUM_SURR = 3 # fewer surrogates +MODEL_TRAIN_EPOCHS = 40 +SURR_TRAIN_EPOCHS = 40 +COWN_TRAIN_EPOCHS = 20 +MASK_RATIOS = [0.0, 0.1, 0.2, 0.4] + + +# ---------------------------- +# Helpers +# ---------------------------- +def set_seed(seed=0): + random.seed(seed); np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + +def load_dataset(name, device="cpu"): + lname = name.lower() + if lname in [ "pubmed","cora","citeseer"]: + dataset = Planetoid(root=f"data/{name}", name=name) + data = dataset[0].to(device) + elif "amazon" in lname: + sub = "Photo" if "photo" in lname else "Computers" + dataset = Amazon(root=f"data/{lname}", name=sub) + data = dataset[0].to(device) + elif lname in ["dblp","db_lp","db-lp"]: + dataset = CitationFull(root="data/dblp", name="dblp") + data = dataset[0].to(device) + else: + raise ValueError(f"Unknown dataset {name}") + return data, dataset + +def split_nodes(num_nodes, ratios=(0.3,0.3,0.3,0.1), seed=0): + rng = np.random.RandomState(seed) + perm = rng.permutation(num_nodes) + sizes = [int(r*num_nodes) for r in ratios] + sizes[-1] = num_nodes - sum(sizes[:-1]) + splits, names, start = {}, ["train","dshadow","dsurr","dtest"], 0 + for name, sz in zip(names, sizes): + idx = perm[start:start+sz] + mask = torch.zeros(num_nodes, dtype=torch.bool); mask[idx] = True + splits[name] = mask; start += sz + return splits + +def filter_edges_to_mask(data, mask): + ei = data.edge_index; mask = mask.to(ei.device) + keep = ((mask[ei[0]] == True) & (mask[ei[1]] == True)) + return ei[:, keep] + +def mask_features_global(data, mask_ratio=0.1, seed=0): + x = data.x.clone(); num_feats = x.size(1) + k = max(1, int(mask_ratio * num_feats)) + rng = np.random.RandomState(seed) + feat_idx = rng.choice(num_feats, k, replace=False) + x[:, feat_idx] = 0.0 + data2 = Data(x=x, edge_index=data.edge_index.clone(), y=data.y.clone()) + return data2, feat_idx + +# ---------------------------- +# Models & Training +# ---------------------------- +def build_model(model_type, in_dim, out_dim, layers=2): + cls_map = {"GCN": GCN, "GraphSAGE": GraphSAGE, "GAT": GAT, "GIN": GIN, "SGC": SGC} + cls = cls_map[model_type] + try: + return cls(in_channels=in_dim, out_channels=out_dim, num_layers=layers) + except TypeError: + return cls(in_dim, out_dim, layers) + +def train_model(model, data, train_mask, epochs=200, lr=0.01, device="cpu"): + model = model.to(device); data = data.to(device) + opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) + for _ in range(epochs): + model.train(); opt.zero_grad() + out = model(data.x, data.edge_index) + loss = F.cross_entropy(out[train_mask], data.y[train_mask]) + loss.backward(); opt.step() + return model + +def train_with_soft_labels(model, data, train_mask, soft_targets, epochs=200, lr=0.01, device="cpu"): + model = model.to(device); data = data.to(device) + soft_targets = soft_targets.to(device) + opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) + for _ in range(epochs): + model.train(); opt.zero_grad() + out = F.log_softmax(model(data.x, data.edge_index), dim=1) + loss = F.kl_div(out[train_mask], soft_targets[train_mask], reduction='batchmean') + loss.backward(); opt.step() + return model + +def compute_accuracy(model, data, mask): + model.eval() + with torch.no_grad(): + logits = model(data.x, data.edge_index) + pred = logits.argmax(dim=1) + return (pred[mask] == data.y[mask]).float().mean().item() * 100 + +def compute_fidelity(model, target, data, mask): + model.eval(); target.eval() + with torch.no_grad(): + pred_m = model(data.x, data.edge_index).argmax(dim=1) + pred_t = target(data.x, data.edge_index).argmax(dim=1) + return (pred_m[mask] == pred_t[mask]).float().mean().item() * 100 + +# ---------------------------- +# Holistic vectors & C_own +# ---------------------------- +def model_to_vector_probs(model, data, node_order=None): + model.eval() + with torch.no_grad(): + probs = F.softmax(model(data.x, data.edge_index), dim=1).cpu() + if node_order is None: + node_order = torch.arange(probs.size(0)) + return probs[node_order].reshape(-1).numpy() + +class COwn(nn.Module): + def __init__(self, input_dim): + super().__init__() + self.net = nn.Sequential( + nn.Linear(input_dim, 128), nn.ReLU(), + nn.Linear(128, 64), nn.ReLU(), + nn.Linear(64, 2) + ) + def forward(self, x): return self.net(x) + +# ---------------------------- +# Settings mapping (I–IV) +# ---------------------------- +def get_setting_architectures(setting): + overlapping, disjoint = ["GCN","GAT","GraphSAGE"], ["GIN","SGC"] + l_same, l_diff = 2, 3 + if setting == "I": Fs, Find, lFs, lFind = overlapping, overlapping, l_same, l_same + elif setting == "II": Fs, Find, lFs, lFind = overlapping, overlapping, l_diff, l_same + elif setting == "III": Fs, Find, lFs, lFind = disjoint, overlapping, l_same, l_same + elif setting == "IV": Fs, Find, lFs, lFind = disjoint, overlapping, l_diff, l_same + else: raise ValueError("Invalid setting") + return Fs, Find, lFs, lFind + +# ---------------------------- +# Main experiment (Table 5 / Fig 3) +# ---------------------------- +def run_table5_full(dataset_name, setting="I", inductive=False, device="cpu"): + data_orig, dataset = load_dataset(dataset_name, device=device) + in_dim, out_dim = dataset.num_features, dataset.num_classes + Fs, Find, lFs, lFind = get_setting_architectures(setting) + + results = [] + for seed in SEEDS: + set_seed(seed) + splits = split_nodes(data_orig.num_nodes, seed=seed) + node_order = torch.where(splits["train"])[0] + + # baseline target + base_model = build_model("GCN", in_dim, out_dim, 2) + base_model = train_model(base_model, data_orig, splits["train"], + epochs=MODEL_TRAIN_EPOCHS, device=device) + base_acc = compute_accuracy(base_model, data_orig, splits["dtest"]) + + for mask_ratio in MASK_RATIOS: + data_masked, _ = mask_features_global(data_orig, mask_ratio, seed=seed) + + # train masked target + tgt = build_model("GCN", in_dim, out_dim, 2) + tgt = train_model(tgt, data_masked, splits["train"], epochs=MODEL_TRAIN_EPOCHS, device=device) + tgt_acc = compute_accuracy(tgt, data_masked, splits["dtest"]) + drop = base_acc - tgt_acc + print(f"[{dataset_name}-{setting}-seed{seed}] Mask={mask_ratio:.2f}, acc={tgt_acc:.2f}, drop={drop:.2f}") + + # Independents + indep_vecs, indep_accs = [], [] + for arch in Find: + for j in range(NUM_INDEP): + m = build_model(arch, in_dim, out_dim, lFind) + m = train_model(m, data_masked, splits["train"], epochs=MODEL_TRAIN_EPOCHS, device=device) + indep_accs.append(compute_accuracy(m, data_masked, splits["dtest"])) + indep_vecs.append(model_to_vector_probs(m, data_masked, node_order)) + + # Surrogates + with torch.no_grad(): + soft_all = F.softmax(tgt(data_masked.x, data_masked.edge_index), dim=1).cpu() + + surr_vecs, surr_accs, surr_fids = [], [], [] + for arch in Fs: + for j in range(NUM_SURR): + m = build_model(arch, in_dim, out_dim, lFs) + m = train_with_soft_labels(m, data_masked, splits["train"], soft_all, + epochs=SURR_TRAIN_EPOCHS, device=device) + surr_accs.append(compute_accuracy(m, data_masked, splits["dtest"])) + surr_fids.append(compute_fidelity(m, tgt, data_masked, splits["dtest"])) + surr_vecs.append(model_to_vector_probs(m, data_masked, node_order)) + + # Ownership classifier (full batch training for stability) + X = np.vstack(indep_vecs + surr_vecs) + y = np.array([0]*len(indep_vecs) + [1]*len(surr_vecs)) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.3, stratify=y, random_state=seed + ) + cown = COwn(X.shape[1]).to(device) + opt = torch.optim.Adam(cown.parameters(), lr=0.001, weight_decay=1e-4) + X_train_t, y_train_t = torch.tensor(X_train,dtype=torch.float32,device=device), torch.tensor(y_train,dtype=torch.long,device=device) + X_test_t, y_test_t = torch.tensor(X_test,dtype=torch.float32,device=device), torch.tensor(y_test,dtype=torch.long,device=device) + + for epoch in range(COWN_TRAIN_EPOCHS): + cown.train() + out = cown(X_train_t) + loss = F.cross_entropy(out, y_train_t) + opt.zero_grad(); loss.backward(); opt.step() + + with torch.no_grad(): + preds = cown(X_test_t).argmax(dim=1).cpu().numpy() + c_acc = (preds == y_test).mean()*100 + print(f"[{dataset_name}-{setting}-seed{seed}] C_own acc={c_acc:.2f}") + + # save + results.append({ + "dataset": dataset_name, + "setting": setting, + "mode": "Inductive" if inductive else "Transductive", + "seed": seed, + "mask_ratio": mask_ratio, + "target_acc": tgt_acc, + "indep_acc_mean": np.mean(indep_accs), + "surr_acc_mean": np.mean(surr_accs), + "surr_fid_mean": np.mean(surr_fids), + "cown_acc": c_acc + }) + + return pd.DataFrame(results) + +# ---------------------------- +# Driver +# ---------------------------- +if __name__ == "__main__": + os.makedirs("results", exist_ok=True) + datasets, settings = ["Cora","CiteSeer","PubMed","Amazon","dblp"], ["I","II","III","IV"] + device = "cuda" if torch.cuda.is_available() else "cpu" + all_results = [] + + for ds in datasets: + for st in settings: + for mode in [False, True]: # transductive=False, inductive=True + df = run_table5_full(dataset_name=ds, setting=st, inductive=mode, device=device) + all_results.append(df) + + all_results = pd.concat(all_results, ignore_index=True) + all_results.to_csv("results/all_results_per_seed.csv", index=False) + + # --- Aggregation for analyze_tables_extended.py --- + agg = all_results.groupby(["dataset","setting","mode"]).agg({ + "target_acc": ["mean","std"], + "indep_acc_mean": ["mean","std"], + "surr_acc_mean": ["mean","std"], + "surr_fid_mean": ["mean","std"], + "cown_acc": ["mean","std"] + }).reset_index() + + agg.columns = [ + "dataset","setting","mode", + "target_acc_mean","target_acc_std", + "indep_acc_mean","indep_acc_std", + "surr_acc_mean","surr_acc_std", + "surr_fid_mean","surr_fid_std", + "cown_acc_mean","cown_acc_std" + ] + agg.to_csv("results/table5_all_results.csv", index=False) + + print("✅ Saved results/all_results_per_seed.csv and results/table5_all_results.csv (aggregated)") From a153377b50a8d40e057ab03b47c1dddacfe77852 Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Wed, 8 Oct 2025 17:57:11 +0500 Subject: [PATCH 08/22] Create desc --- results/desc | 1 + 1 file changed, 1 insertion(+) create mode 100644 results/desc diff --git a/results/desc b/results/desc new file mode 100644 index 0000000..9b4db40 --- /dev/null +++ b/results/desc @@ -0,0 +1 @@ +This folder consists of all the experiments carried out. From 593099de21422b7e94e295b305d46557cc71fb58 Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Wed, 8 Oct 2025 17:57:50 +0500 Subject: [PATCH 09/22] Add files via upload --- results/BGrOVe_table4.csv | 41 ++++++++++++++++++++++++++++++++++ results/CORA_table3.csv | 7 ++++++ results/CiteSeer_Table3.csv | 8 +++++++ results/table5_all_results.csv | 41 ++++++++++++++++++++++++++++++++++ results/table6_latest.csv | 41 ++++++++++++++++++++++++++++++++++ results/table7.csv | 41 ++++++++++++++++++++++++++++++++++ results/table8.csv | 41 ++++++++++++++++++++++++++++++++++ 7 files changed, 220 insertions(+) create mode 100644 results/BGrOVe_table4.csv create mode 100644 results/CORA_table3.csv create mode 100644 results/CiteSeer_Table3.csv create mode 100644 results/table5_all_results.csv create mode 100644 results/table6_latest.csv create mode 100644 results/table7.csv create mode 100644 results/table8.csv diff --git a/results/BGrOVe_table4.csv b/results/BGrOVe_table4.csv new file mode 100644 index 0000000..41c1245 --- /dev/null +++ b/results/BGrOVe_table4.csv @@ -0,0 +1,41 @@ +Dataset,Condition,Setting,FPR (%),FNR (%),ACC (%) +Cora,CondA ✓,I,91.11 ± 17.78,4.44 ± 8.89,52.22 ± 4.44 +Cora,CondA ✓,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +Cora,CondA ✓,III,0.00 ± 0.00,70.00 ± 10.00,72.00 ± 4.00 +Cora,CondA ✓,IV,0.00 ± 0.00,70.00 ± 10.00,72.00 ± 4.00 +Cora,CondA ✗,I,86.67 ± 10.89,6.67 ± 5.44,53.33 ± 2.72 +Cora,CondA ✗,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +Cora,CondA ✗,III,0.00 ± 0.00,70.00 ± 10.00,72.00 ± 4.00 +Cora,CondA ✗,IV,0.00 ± 0.00,70.00 ± 10.00,72.00 ± 4.00 +CiteSeer,CondA ✓,I,71.11 ± 29.48,20.00 ± 24.75,54.44 ± 4.16 +CiteSeer,CondA ✓,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +CiteSeer,CondA ✓,III,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +CiteSeer,CondA ✓,IV,0.00 ± 0.00,70.00 ± 10.00,72.00 ± 4.00 +CiteSeer,CondA ✗,I,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +CiteSeer,CondA ✗,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +CiteSeer,CondA ✗,III,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +CiteSeer,CondA ✗,IV,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +Amazon,CondA ✓,I,71.11 ± 36.92,20.00 ± 29.31,54.44 ± 4.16 +Amazon,CondA ✓,II,53.33 ± 42.40,26.67 ± 25.92,60.00 ± 8.89 +Amazon,CondA ✓,III,0.00 ± 0.00,30.00 ± 10.00,88.00 ± 4.00 +Amazon,CondA ✓,IV,3.33 ± 6.67,35.00 ± 12.25,84.00 ± 4.90 +Amazon,CondA ✗,I,93.33 ± 8.89,0.00 ± 0.00,53.33 ± 4.44 +Amazon,CondA ✗,II,24.44 ± 38.75,55.56 ± 31.43,60.00 ± 6.48 +Amazon,CondA ✗,III,0.00 ± 0.00,35.00 ± 12.25,86.00 ± 4.90 +Amazon,CondA ✗,IV,0.00 ± 0.00,40.00 ± 12.25,84.00 ± 4.90 +DBLP,CondA ✓,I,91.11 ± 12.96,4.44 ± 8.89,52.22 ± 2.72 +DBLP,CondA ✓,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +DBLP,CondA ✓,III,6.67 ± 8.16,60.00 ± 20.00,72.00 ± 4.00 +DBLP,CondA ✓,IV,13.33 ± 12.47,45.00 ± 18.71,74.00 ± 4.90 +DBLP,CondA ✗,I,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +DBLP,CondA ✗,II,91.11 ± 17.78,6.67 ± 13.33,51.11 ± 2.22 +DBLP,CondA ✗,III,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +DBLP,CondA ✗,IV,6.67 ± 8.16,30.00 ± 24.49,84.00 ± 10.20 +PubMed,CondA ✓,I,97.78 ± 4.44,0.00 ± 0.00,51.11 ± 2.22 +PubMed,CondA ✓,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +PubMed,CondA ✓,III,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +PubMed,CondA ✓,IV,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +PubMed,CondA ✗,I,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +PubMed,CondA ✗,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +PubMed,CondA ✗,III,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +PubMed,CondA ✗,IV,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 \ No newline at end of file diff --git a/results/CORA_table3.csv b/results/CORA_table3.csv new file mode 100644 index 0000000..4bb6274 --- /dev/null +++ b/results/CORA_table3.csv @@ -0,0 +1,7 @@ +Dataset,Model,Setting,TCA,ECA,TBA,EBA +Cora,GAT,With Backdoor,73.97540808,81.65983558,4.629629478,4.629629478 +Cora,GAT,Without Backdoor,79.30327654,79.50819731,77.7777791,83.33333135 +Cora,GCN,With Backdoor,77.76639462,82.17213154,5.555555597,9.259258956 +Cora,GCN,Without Backdoor,82.78688788,84.93852615,86.11111045,85.18518806 +Cora,GraphSAGE,With Backdoor,80.84016442,84.22130942,4.629629478,0.9259259328 +Cora,GraphSAGE,Without Backdoor,82.99180269,84.73360538,85.18518806,87.9629612 \ No newline at end of file diff --git a/results/CiteSeer_Table3.csv b/results/CiteSeer_Table3.csv new file mode 100644 index 0000000..a4cf971 --- /dev/null +++ b/results/CiteSeer_Table3.csv @@ -0,0 +1,8 @@ +Dataset,Model,Setting,TCA,ECA,TBA,EBA +CiteSeer,GCN,With Backdoor,64.77462649,68.53088737,20.30075192,9.022556245 +CiteSeer,GCN,With Backdoor,64.77462649,68.53088737,20.30075192,9.022556245 +CiteSeer,GCN,Without Backdoor,71.45242095,69.61602569,69.17293072,65.41353464 +CiteSeer,GAT,With Backdoor,68.11352372,67.19532609,28.57142985,8.270676434 +CiteSeer,GAT,Without Backdoor,72.37061858,69.44907904,70.67669034,69.92481351 +CiteSeer,GraphSAGE,With Backdoor,71.70283794,70.86811066,22.55639136,15.78947306 +CiteSeer,GraphSAGE,Without Backdoor,74.62437153,71.53589129,73.68420959,71.42857313 \ No newline at end of file diff --git a/results/table5_all_results.csv b/results/table5_all_results.csv new file mode 100644 index 0000000..be42d30 --- /dev/null +++ b/results/table5_all_results.csv @@ -0,0 +1,41 @@ +dataset,setting,mode,target_acc_mean,target_acc_std,indep_acc_mean,indep_acc_std,surr_acc_mean,surr_acc_std,surr_fid_mean,surr_fid_std,cown_acc_mean,cown_acc_std +Amazon,I,Inductive,74.72766936,2.433793066,75.99350429,2.807438253,66.12099587,3.767657614,84.28144935,2.742822875,93.75,8.625819492 +Amazon,I,Transductive,74.72766936,2.433793066,75.98846116,2.809416573,66.26724779,3.505791579,84.50839197,2.287750406,93.75,8.625819492 +Amazon,II,Inductive,74.21024069,2.936450688,75.722182,1.483742129,64.10977973,3.810170195,82.16029273,2.239979708,100,0 +Amazon,II,Transductive,74.21024069,2.936450688,75.72319065,1.48381853,64.13096107,3.76414472,82.13205106,1.9391017,100,0 +Amazon,III,Inductive,74.818445,2.776157246,76.05906568,1.315007683,48.65196113,3.906533014,61.48329745,5.397102683,100,0 +Amazon,III,Transductive,74.80936721,2.771182961,76.05503119,1.313337296,48.65196113,3.906533014,61.48934931,5.39143826,100,0 +Amazon,IV,Inductive,77.1060288,4.273100267,75.75849262,1.420200514,50.7035226,5.022975708,62.91454871,6.082537691,97.5,7.071067812 +Amazon,IV,Transductive,77.1060288,4.273100267,75.75748406,1.420005262,50.7035226,5.022975708,62.91454871,6.082537691,97.5,7.071067812 +CiteSeer,I,Inductive,71.77177369594574,1.0647533107843492,72.94377974338002,1.3912004966121483,73.57774749398232,1.1895509263973538,91.66249790125423,0.5610137603513385,91.66666666666667,8.908708063747481 +CiteSeer,I,Transductive,71.77177369594574,1.0647533107843492,72.94377974338002,1.3912004966121483,73.56940582,1.1983429173168936,91.65415623121791,0.5644055196687388,91.66666666666667,8.908708063747481 +CiteSeer,II,Inductive,72.10961133241653,1.1055630562462107,72.92292540272076,1.138527077192637,72.17634485827551,1.5939817451938052,91.49983541833029,0.9707933926959381,93.75,8.625819491779417 +CiteSeer,II,Transductive,72.10961133241653,1.1055630562462107,72.92292540272076,1.138527077192637,72.18468652831183,1.585126582819718,91.51234792338477,0.9698406293019681,91.66666666666667,8.908708063747481 +CiteSeer,III,Inductive,71.84684872627258,0.9186045352785076,73.03970886601343,1.3594631797562717,51.82682887340585,2.433482699252227,60.47297349820534,2.528358882931918,100,0 +CiteSeer,III,Transductive,71.84684872627258,0.9186045352785076,73.03970886601343,1.3594631797562717,51.82682887340585,2.433482699252227,60.47297349820534,2.528358882931918,100,0 +CiteSeer,IV,Inductive,72.40991219878197,1.4574197263651163,72.95212108227942,1.1431189724073934,56.65040258318186,3.119829912164031,66.52277372777462,3.530106445182951,100,0 +CiteSeer,IV,Transductive,72.40991219878197,1.4574197263651163,72.95212108227942,1.1431189724073934,56.64414633065462,3.110560938860846,66.52902998030186,3.5384408830786906,100,0 +Cora,I,Inductive,86.07536629,0.364350057,85.08476276,0.729251325,86.01919835,0.728699547,95.40951807,1.552431607,95.83333333,7.715167498 +Cora,I,Transductive,86.07536629,0.364350057,85.08476276,0.729251325,86.01919835,0.728699547,95.40951807,1.552431607,95.83333333,7.715167498 +Cora,II,Inductive,86.16727814,0.67897562,85.20220543,0.684504016,85.75367555,0.713492315,95.37377523,0.875243012,91.66666667,8.908708064 +Cora,II,Transductive,86.16727814,0.67897562,85.20220543,0.684504016,85.75367555,0.713492315,95.37377523,0.875243012,91.66666667,8.908708064 +Cora,III,Inductive,85.98345444,0.603706665,85.05412497,0.95587684,59.26776932,2.043234332,62.53829622,2.596340859,100,0 +Cora,III,Transductive,85.98345444,0.603706665,85.05412497,0.95587684,59.26776932,2.043234332,62.53829622,2.596340859,100,0 +Cora,IV,Inductive,85.98345518,1.11923336,85.16646177,0.710374363,66.8734679,2.61199434,70.71844321,2.718749521,100,0 +Cora,IV,Transductive,85.98345518,1.11923336,85.16646177,0.710374363,66.8734679,2.61199434,70.71844321,2.718749521,100,0 +PubMed,I,Inductive,85.47160029411316,1.4055767825866161,86.07800669140286,1.2555878405802159,84.03622731566429,1.5159967661156328,95.70162635710504,0.48851841394759443,100,0 +PubMed,I,Transductive,85.47160029411316,1.4055767825866161,86.07871101962195,1.255610642018456,84.03622731566429,1.5159967661156328,95.70162635710504,0.48851841394759443,100,0 +PubMed,II,Inductive,85.43990552425385,1.5075765891313828,86.10547466410532,1.219525037818393,83.65519858,1.5021227600145137,94.75574476851358,0.5504532852567569,100,0 +PubMed,II,Transductive,85.43990552425385,1.5075765891313828,86.10547466410532,1.219525037818393,83.65449433525403,1.5014299316259458,94.75504044029448,0.5503271029803263,100,0 +PubMed,III,Inductive,85.45892238616943,1.4018459600203355,86.10899606,1.235588847369993,57.03177681813637,1.537385352822437,60.18424490466714,1.7685813325906397,100,0 +PubMed,III,Transductive,85.45892238616943,1.4018459600203355,86.10899606,1.235588847369993,57.03177681813637,1.537385352822437,60.18424490466714,1.7685813325906397,100,0 +PubMed,IV,Inductive,85.37651822,1.4541180433778989,86.08434523145358,1.2317357633422827,63.20359905560811,3.853679200667111,67.01529628,4.251243661477413,100,0 +PubMed,IV,Transductive,85.37651822,1.4541180433778989,86.08434523145358,1.2317357633422827,63.20359905560811,3.853679200667111,67.01529628,4.251243661477413,100,0 +dblp,I,Inductive,84.09667015075684,0.8904874959006944,83.11411357588239,0.6737062534471557,83.48208309875594,1.0747514260566795,94.39433357781834,0.5186943011733857,100,0 +dblp,I,Transductive,84.09667015075684,0.8904874959006944,83.11411357588239,0.6737062534471557,83.48208309875594,1.0747514260566795,94.39433357781834,0.5186943011733857,100,0 +dblp,II,Inductive,84.15303975343704,0.8325283644192363,83.06322404079967,0.7391647255743868,84.17809307575226,0.7964256301882281,94.14928149845865,0.6040526812442378,100,0 +dblp,II,Transductive,84.15303975343704,0.8325283644192363,83.06165817711089,0.7366850732262857,84.17731018529997,0.7951250309841605,94.14849860800638,0.6041019644536465,100,0 +dblp,III,Inductive,84.12485420703888,0.8585653812645303,83.04521698090765,0.7541799999775749,63.776537527640656,5.409483281624359,67.23505876337488,6.011977794715171,100,0 +dblp,III,Transductive,84.12485420703888,0.8585653812645303,83.04756573504872,0.7529822086845728,63.777711925407246,5.410287503708998,67.23153569425146,6.014947800689849,100,0 +dblp,IV,Inductive,84.18826982,0.9237954497679515,83.08592844340536,0.6570379800683244,68.50572787225246,4.590483784023131,71.64364544053872,4.968114182274684,100,0 +dblp,IV,Transductive,84.18826982,0.9237954497679515,83.08514555295308,0.6573444249326462,68.50807654,4.593675097736268,71.64364544053872,4.968114182274684,100,0 diff --git a/results/table6_latest.csv b/results/table6_latest.csv new file mode 100644 index 0000000..dd3a70a --- /dev/null +++ b/results/table6_latest.csv @@ -0,0 +1,41 @@ +Dataset,Setting,Mode,Ori_ACC(%),FPR(%),FNR(%),Fine_ACC(%) +Amazon,I,Inductive,83.48,0,0,100 +Amazon,I,Transductive,88.27,0,0,100 +Amazon,II,Inductive,85.87,0,0,100 +Amazon,II,Transductive,86.23,0,0,100 +Amazon,III,Inductive,37.51,0,0,100 +Amazon,III,Transductive,37.51,0,0,100 +Amazon,IV,Inductive,37.51,0,0,100 +Amazon,IV,Transductive,37.51,0,0,100 +PubMed,I,Inductive,87.53,0,0,100 +PubMed,I,Transductive,87.44,0,0,100 +PubMed,II,Inductive,87.34,0,0,100 +PubMed,II,Transductive,87.34,0,0,100 +PubMed,III,Inductive,65.32,0,0,100 +PubMed,III,Transductive,65.26,0,0,100 +PubMed,IV,Inductive,83.32,0,0,100 +PubMed,IV,Transductive,83.32,0,0,100 +CiteSeer,I,Inductive,87.38,0,0,100 +CiteSeer,I,Transductive,87.92,0,0,100 +CiteSeer,II,Inductive,85.78,0,0,100 +CiteSeer,II,Transductive,85.81,0,0,100 +CiteSeer,III,Inductive,46.26,0,0,100 +CiteSeer,III,Transductive,46.26,0,0,100 +CiteSeer,IV,Inductive,71.6,0,0,100 +CiteSeer,IV,Transductive,71.6,0,0,100 +Cora,I,Inductive,94.68,0,0,100 +Cora,I,Transductive,94.28,0,0,100 +Cora,II,Inductive,93.57,0,0,100 +Cora,II,Transductive,93.57,0,0,100 +Cora,III,Inductive,43.69,0,0,100 +Cora,III,Transductive,43.69,0,0,100 +Cora,IV,Inductive,60.04,0,0,100 +Cora,IV,Transductive,59.27,0,0,100 +dblp,I,Inductive,87.83,0,0,100 +dblp,I,Transductive,87.62,0,0,100 +dblp,II,Inductive,88.68,0,0,100 +dblp,II,Transductive,88.62,0,0,100 +dblp,III,Inductive,57.05,0,0,100 +dblp,III,Transductive,57,0,0,100 +dblp,IV,Inductive,73.44,0,0,100 +dblp,IV,Transductive,73.44,0,0,100 diff --git a/results/table7.csv b/results/table7.csv new file mode 100644 index 0000000..7fbd4d6 --- /dev/null +++ b/results/table7.csv @@ -0,0 +1,41 @@ +Dataset,Setting,Mode,FPR +dblp,I,Inductive,33.33 ± 0.00 +dblp,I,Transductive,46.67 ± 16.33 +dblp,II,Inductive,60.00 ± 13.33 +dblp,II,Transductive,60.00 ± 13.33 +dblp,III,Inductive,40.00 ± 13.33 +dblp,III,Transductive,40.00 ± 13.33 +dblp,IV,Inductive,33.33 ± 0.00 +dblp,IV,Transductive,33.33 ± 0.00 +Amazon,I,Inductive,46.67 ± 16.33 +Amazon,I,Transductive,46.67 ± 16.33 +Amazon,II,Inductive,46.67 ± 16.33 +Amazon,II,Transductive,33.33 ± 0.00 +Amazon,III,Inductive,33.33 ± 0.00 +Amazon,III,Transductive,33.33 ± 0.00 +Amazon,IV,Inductive,33.33 ± 0.00 +Amazon,IV,Transductive,33.33 ± 0.00 +PubMed,I,Inductive,40.00 ± 13.33 +PubMed,I,Transductive,40.00 ± 13.33 +PubMed,II,Inductive,60.00 ± 13.33 +PubMed,II,Transductive,60.00 ± 13.33 +PubMed,III,Inductive,60.00 ± 13.33 +PubMed,III,Transductive,60.00 ± 13.33 +PubMed,IV,Inductive,60.00 ± 13.33 +PubMed,IV,Transductive,60.00 ± 13.33 +CiteSeer,I,Inductive,33.33 ± 0.00 +CiteSeer,I,Transductive,33.33 ± 0.00 +CiteSeer,II,Inductive,33.33 ± 0.00 +CiteSeer,II,Transductive,33.33 ± 0.00 +CiteSeer,III,Inductive,53.33 ± 16.33 +CiteSeer,III,Transductive,53.33 ± 16.33 +CiteSeer,IV,Inductive,40.00 ± 13.33 +CiteSeer,IV,Transductive,40.00 ± 13.33 +Cora,I,Inductive,60.00 ± 13.33 +Cora,I,Transductive,60.00 ± 13.33 +Cora,II,Inductive,40.00 ± 13.33 +Cora,II,Transductive,40.00 ± 13.33 +Cora,III,Inductive,53.33 ± 16.33 +Cora,III,Transductive,53.33 ± 16.33 +Cora,IV,Inductive,53.33 ± 16.33 +Cora,IV,Transductive,53.33 ± 16.33 diff --git a/results/table8.csv b/results/table8.csv new file mode 100644 index 0000000..b9b25e8 --- /dev/null +++ b/results/table8.csv @@ -0,0 +1,41 @@ +Dataset,Setting,Mode,Ori_ACC(%),FPR(%),FNR(%),Double_ACC(%) +Amazon,I,Inductive,87.19,0.0,0.0,100.0 +Amazon,I,Transductive,87.4,0.0,0.0,100.0 +Amazon,II,Inductive,83.95,0.0,0.0,100.0 +Amazon,II,Transductive,83.97,0.0,0.0,100.0 +Amazon,III,Inductive,37.51,0.0,0.0,100.0 +Amazon,III,Transductive,37.51,0.0,0.0,100.0 +Amazon,IV,Inductive,37.51,0.0,0.0,100.0 +Amazon,IV,Transductive,37.51,0.0,0.0,100.0 +CiteSeer,I,Inductive,87.29,0.0,0.0,100.0 +CiteSeer,I,Transductive,87.29,0.0,0.0,100.0 +CiteSeer,II,Inductive,85.81,0.0,0.0,100.0 +CiteSeer,II,Transductive,85.81,0.0,0.0,100.0 +CiteSeer,III,Inductive,40.7,0.0,0.0,100.0 +CiteSeer,III,Transductive,42.02,0.0,0.0,100.0 +CiteSeer,IV,Inductive,49.23,0.0,0.0,100.0 +CiteSeer,IV,Transductive,49.23,0.0,0.0,100.0 +Cora,I,Inductive,94.05,0.0,0.0,100.0 +Cora,I,Transductive,94.35,0.0,0.0,100.0 +Cora,II,Inductive,93.17,0.0,0.0,100.0 +Cora,II,Transductive,93.17,0.0,0.0,100.0 +Cora,III,Inductive,30.21,0.0,0.0,100.0 +Cora,III,Transductive,30.21,0.0,0.0,100.0 +Cora,IV,Inductive,42.98,0.0,0.0,100.0 +Cora,IV,Transductive,42.98,0.0,0.0,100.0 +PubMed,I,Inductive,87.47,0.0,0.0,100.0 +PubMed,I,Transductive,87.47,0.0,0.0,100.0 +PubMed,II,Inductive,87.38,0.0,0.0,100.0 +PubMed,II,Transductive,87.38,0.0,0.0,100.0 +PubMed,III,Inductive,68.86,0.0,0.0,100.0 +PubMed,III,Transductive,68.86,0.0,0.0,100.0 +PubMed,IV,Inductive,67.85,0.0,0.0,100.0 +PubMed,IV,Transductive,67.85,0.0,0.0,100.0 +dblp,I,Inductive,88.02,0.0,0.0,100.0 +dblp,I,Transductive,87.99,0.0,0.0,100.0 +dblp,II,Inductive,89.05,0.0,0.0,100.0 +dblp,II,Transductive,89.02,0.0,0.0,100.0 +dblp,III,Inductive,52.08,0.0,0.0,100.0 +dblp,III,Transductive,52.08,0.0,0.0,100.0 +dblp,IV,Inductive,77.62,0.0,0.0,100.0 +dblp,IV,Transductive,77.6,0.0,0.0,100.0 From 8cd93a0383e042d8e9db996c4f08903ccbf1bf74 Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Wed, 8 Oct 2025 23:20:31 +0500 Subject: [PATCH 10/22] Delete pygip/src directory --- pygip/src/custom_attack.py | 198 ------------------------------------ pygip/src/custom_defense.py | 170 ------------------------------- 2 files changed, 368 deletions(-) delete mode 100644 pygip/src/custom_attack.py delete mode 100644 pygip/src/custom_defense.py diff --git a/pygip/src/custom_attack.py b/pygip/src/custom_attack.py deleted file mode 100644 index 1c2d7d3..0000000 --- a/pygip/src/custom_attack.py +++ /dev/null @@ -1,198 +0,0 @@ -# src/custom_attack.py -import os -import random -from typing import Optional, Union - -import torch -import torch.nn.functional as F - -from src.dataset import Dataset -from src.attacks import BaseAttack -from src.models import GraphSAGE, GCN -from src.train_target import train_masked_target - - -def evaluate_model(model: torch.nn.Module, data, device: torch.device): - model.eval() - data = data.to(device) - with torch.no_grad(): - logits = model(data.x, data.edge_index) - preds = logits.argmax(dim=1) - mask = getattr(data, "test_mask", None) or getattr(data, "val_mask", None) - if mask is None: - mask = torch.ones(data.num_nodes, dtype=torch.bool, device=device) - return (preds[mask] == data.y[mask]).float().mean().item() - - -class FeatureFlipAttack(BaseAttack): - """ - Custom attack that perturbs node features for a fraction of nodes. - Conforms to the PyGIP BaseAttack API: - - attack() - - _load_model() - - _train_target_model() - - _train_attack_model() - """ - - supported_api_types = {"pyg"} - supported_datasets = set() - - def __init__(self, dataset: Dataset, attack_node_fraction: float, model_path: str = None, - device: Optional[Union[str, torch.device]] = None): - # must call super() so BaseAttack sets self.device and graph fields - super().__init__(dataset, attack_node_fraction, model_path, device) - - if not (0.0 < self.attack_node_fraction <= 1.0): - raise ValueError("attack_node_fraction must be in (0,1].") - - def _seed(self, seed: int = 0): - random.seed(seed) - torch.manual_seed(seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed_all(seed) - - def _load_model(self): - """ - Try to load a model checkpoint at self.model_path. - Expected checkpoint formats: - - state_dict or - - dict with 'state_dict' / 'model_state' and optional 'model_type' metadata - Returns a model instance moved to self.device or None if no model_path provided. - """ - if not self.model_path: - return None - if not os.path.exists(self.model_path): - print(f"[FeatureFlipAttack] model_path {self.model_path} not found.") - return None - - checkpoint = torch.load(self.model_path, map_location=self.device) - state = checkpoint.get("state_dict", checkpoint) if isinstance(checkpoint, dict) else checkpoint - model_type = checkpoint.get("model_type", None) if isinstance(checkpoint, dict) else None - - # minimal heuristic for model_type - if model_type is None and isinstance(state, dict): - keys = list(state.keys()) - if any("lin_l" in k or "lin_r" in k for k in keys): - model_type = "GraphSAGE" - else: - model_type = "GCN" - - # instantiate appropriate model - in_dim = self.num_features - out_dim = self.num_classes - hid = checkpoint.get("hidden", 64) if isinstance(checkpoint, dict) else 64 - - if model_type == "GraphSAGE": - model = GraphSAGE(in_dim, out_dim, hidden=hid).to(self.device) - else: - model = GCN(in_dim, out_dim, hidden=hid).to(self.device) - - try: - # try multiple common keys - if "state_dict" in checkpoint: - state_dict = checkpoint["state_dict"] - elif "model_state" in checkpoint: - state_dict = checkpoint["model_state"] - else: - state_dict = state - model.load_state_dict(state_dict, strict=False) - model.eval() - return model - except Exception as e: - print(f"[FeatureFlipAttack] Failed to load weights: {e}") - return None - - def _train_target_model(self, train_epochs: int = 100, lr: float = 1e-2, seed: int = 0): - """ - Train a victim model (GraphSAGE) on the clean dataset and return the trained model. - Uses self.graph_data and self.device. - """ - self._seed(seed) - data = self.graph_data.to(self.device) - model = GraphSAGE(self.num_features, self.num_classes, hidden=64).to(self.device) - opt = torch.optim.Adam(model.parameters(), lr=lr) - - model.train() - for _ in range(train_epochs): - opt.zero_grad() - logits = model(data.x, data.edge_index) - loss = F.cross_entropy(logits[data.train_mask], data.y[data.train_mask]) - loss.backward() - opt.step() - - model.eval() - return model - - def _train_attack_model(self, dataset_name: str = "Cora", mask_ratio: float = 0.12, - hidden: int = 64, epochs: int = 200, seed: int = 0): - """ - Train a surrogate/attack model using the existing train_masked_target routine. - Returns path to checkpoint saved by train_masked_target. - """ - # train_masked_target will handle device selection via its arguments - ckpt_path = train_masked_target(dataset_name=dataset_name, - mask_ratio=mask_ratio, - hidden=hidden, - epochs=epochs, - seed=seed, - device=self.device) - return ckpt_path - - def _perturb_features(self, data, fraction: float): - """Return a clone of data with fraction of node features replaced by Gaussian noise.""" - pert = data.clone() - num_nodes = pert.num_nodes - k = max(1, int(fraction * num_nodes)) - idx = torch.randperm(num_nodes)[:k] - noise = torch.randn((k, pert.x.size(1)), device=pert.x.device) * 0.5 - pert.x[idx] = noise - return pert, k - - def attack(self, retrain_target: bool = True, retrain_epochs: int = 50, seed: int = 0): - """ - Main attack logic: - 1) Load or train a target model - 2) Evaluate baseline - 3) Perturb features on a fraction of nodes - 4) Optionally retrain a model on perturbed data and evaluate - Returns dict with metrics. - """ - self._seed(seed) - - if self.graph_data is None: - raise RuntimeError("No graph_data available in dataset.") - - # Step 1: get target model - model = self._load_model() or (self._train_target_model(train_epochs=retrain_epochs, seed=seed) if retrain_target else None) - if model is None: - model = self._train_target_model(train_epochs=retrain_epochs, seed=seed) - - # Step 2: eval before - acc_before = evaluate_model(model, self.graph_data, self.device) - - # Step 3: perturb features - perturbed_data, num_perturbed = self._perturb_features(self.graph_data, self.attack_node_fraction) - - # Step 4: retrain and evaluate on perturbed graph - model_pert = GraphSAGE(self.num_features, self.num_classes, hidden=64).to(self.device) - opt = torch.optim.Adam(model_pert.parameters(), lr=1e-2) - perturbed_data = perturbed_data.to(self.device) - - model_pert.train() - for _ in range(retrain_epochs): - opt.zero_grad() - logits = model_pert(perturbed_data.x, perturbed_data.edge_index) - loss = F.cross_entropy(logits[perturbed_data.train_mask], perturbed_data.y[perturbed_data.train_mask]) - loss.backward() - opt.step() - - acc_after = evaluate_model(model_pert, perturbed_data, self.device) - - results = { - "attack_name": "FeatureFlipAttack", - "attack_fraction": self.attack_node_fraction, - "num_perturbed": num_perturbed, - "acc_before": acc_before, - "acc_after": acc_after, - } - return results diff --git a/pygip/src/custom_defense.py b/pygip/src/custom_defense.py deleted file mode 100644 index 46dca0e..0000000 --- a/pygip/src/custom_defense.py +++ /dev/null @@ -1,170 +0,0 @@ -# src/custom_defense.py -import torch -import torch.nn.functional as F - -from src.dataset import Dataset -from src.defenses import BaseDefense -from src.models import GraphSAGE -from src.train_target import train_masked_target - - -def evaluate_model(model: torch.nn.Module, data, device: torch.device): - model.eval() - data = data.to(device) - with torch.no_grad(): - logits = model(data.x, data.edge_index) - preds = logits.argmax(dim=1) - mask = getattr(data, "test_mask", None) or getattr(data, "val_mask", None) - if mask is None: - mask = torch.ones(data.num_nodes, dtype=torch.bool, device=device) - return (preds[mask] == data.y[mask]).float().mean().item() - - -class NeighborSmoothingDefense(BaseDefense): - """ - Defense that smooths features by neighbor averaging and retrains a model. - Implements required BaseDefense hooks: - - defend() - - _load_model() - - _train_target_model() - - _train_defense_model() - - _train_surrogate_model() - """ - - supported_api_types = {"pyg"} - supported_datasets = set() - - def __init__(self, dataset: Dataset, attack_node_fraction: float, device: Optional[torch.device] = None): - super().__init__(dataset, attack_node_fraction, device) - - @staticmethod - def smooth_features(data): - row, col = data.edge_index - acc = torch.zeros_like(data.x) - deg = torch.zeros(data.num_nodes, device=data.x.device) - acc.index_add_(0, row, data.x[col]) - deg.index_add_(0, row, torch.ones(col.size(0), device=data.x.device)) - deg = deg.clamp(min=1.0).unsqueeze(1) - return acc / deg - - def _load_model(self): - """ - Attempt to load a model from self.model_path (optional). - Mirrors _load_model style from attack. - """ - if not getattr(self, "model_path", None): - return None - if not os.path.exists(self.model_path): - print(f"[NeighborSmoothingDefense] model_path {self.model_path} not found.") - return None - - checkpoint = torch.load(self.model_path, map_location=self.device) - state = checkpoint.get("state_dict", checkpoint) if isinstance(checkpoint, dict) else checkpoint - model_type = checkpoint.get("model_type", None) if isinstance(checkpoint, dict) else None - - if model_type is None and isinstance(state, dict): - keys = list(state.keys()) - if any("lin_l" in k or "lin_r" in k for k in keys): - model_type = "GraphSAGE" - else: - model_type = "GCN" - - in_dim = self.num_features - out_dim = self.num_classes - hid = checkpoint.get("hidden", 64) if isinstance(checkpoint, dict) else 64 - - if model_type == "GraphSAGE": - model = GraphSAGE(in_dim, out_dim, hidden=hid).to(self.device) - else: - model = GraphSAGE(in_dim, out_dim, hidden=hid).to(self.device) - - try: - if "state_dict" in checkpoint: - state_dict = checkpoint["state_dict"] - elif "model_state" in checkpoint: - state_dict = checkpoint["model_state"] - else: - state_dict = state - model.load_state_dict(state_dict, strict=False) - model.eval() - return model - except Exception as e: - print(f"[NeighborSmoothingDefense] load failed: {e}") - return None - - def _train_target_model(self, data, epochs: int = 50, lr: float = 1e-2, seed: int = 0): - """ - Train a standard target model on the provided data and return it. - This matches the framework hook signature: accepts data and returns model. - """ - torch.manual_seed(seed) - data = data.to(self.device) - model = GraphSAGE(self.num_features, self.num_classes, hidden=64).to(self.device) - opt = torch.optim.Adam(model.parameters(), lr=lr) - - model.train() - for _ in range(epochs): - opt.zero_grad() - logits = model(data.x, data.edge_index) - loss = F.cross_entropy(logits[data.train_mask], data.y[data.train_mask]) - loss.backward() - opt.step() - - model.eval() - return model - - def _train_defense_model(self, data, epochs: int = 50, lr: float = 1e-2, seed: int = 0): - """ - Train model using defense-prepared data (e.g., smoothed features). - """ - # Behavior mirrors _train_target_model; kept separate for clarity - return self._train_target_model(data, epochs=epochs, lr=lr, seed=seed) - - def _train_surrogate_model(self, dataset_name: str = "Cora", mask_ratio: float = 0.12, - hidden: int = 64, epochs: int = 200, seed: int = 0): - """ - Train a surrogate (attacker's) model using the provided train_masked_target helper. - Returns path to saved checkpoint. - """ - ckpt_path = train_masked_target(dataset_name=dataset_name, - mask_ratio=mask_ratio, - hidden=hidden, - epochs=epochs, - seed=seed, - device=self.device) - return ckpt_path - - def defend(self, retrain_epochs: int = 50, seed: int = 0): - """ - Defense workflow: - 1) Train baseline target - 2) Train surrogate (optional) - returns ckpt path for analysis - 3) Apply smoothing and train defense model - 4) Return metrics dictionary - """ - if self.graph_data is None: - raise RuntimeError("No graph_data available in dataset.") - - # Baseline target (trained on original data) - baseline_model = self._train_target_model(self.graph_data, epochs=retrain_epochs, seed=seed) - acc_baseline = evaluate_model(baseline_model, self.graph_data, self.device) - - # Optionally train surrogate for evaluation/debug (not used directly here) - # surrogate_ckpt = self._train_surrogate_model(dataset_name=self.dataset.dataset_name, - # mask_ratio=0.12, hidden=64, epochs=200, seed=seed) - - # Apply smoothing to features - smoothed = self.graph_data.clone() - smoothed.x = self.smooth_features(smoothed) - - # Train defense model on smoothed features - defense_model = self._train_defense_model(smoothed, epochs=max(10, retrain_epochs // 2), seed=seed) - acc_defended = evaluate_model(defense_model, smoothed, self.device) - - results = { - "defense_name": "NeighborSmoothingDefense", - "acc_baseline": acc_baseline, - "acc_defended": acc_defended, - "attack_node_fraction": self.attack_node_fraction, - } - return results From 7569c5fb2212752a0d3616bd18a010e6747e93aa Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Wed, 8 Oct 2025 23:33:55 +0500 Subject: [PATCH 11/22] Delete examples/run_custom_attack.py --- examples/run_custom_attack.py | 36 ----------------------------------- 1 file changed, 36 deletions(-) delete mode 100644 examples/run_custom_attack.py diff --git a/examples/run_custom_attack.py b/examples/run_custom_attack.py deleted file mode 100644 index 52088f5..0000000 --- a/examples/run_custom_attack.py +++ /dev/null @@ -1,36 +0,0 @@ -# # examples/run_custom_attack.py -# import argparse -# from pygip.datasets.datasets import Dataset -# from pygip.src.custom_attack import FeatureFlipAttack - -# def main(): -# parser = argparse.ArgumentParser() -# parser.add_argument("--dataset", type=str, default="Cora") -# parser.add_argument("--fraction", type=float, default=0.1) -# parser.add_argument("--device", type=str, default=None) -# args = parser.parse_args() - -# dataset = Dataset(api_type="pyg", path="./data") -# attack = FeatureFlipAttack(dataset, attack_node_fraction=args.fraction, model_path=None, device=args.device) -# results = attack.attack(retrain_target=True, retrain_epochs=50, seed=0) -# print("Attack results:", results) - -# if __name__ == "__main__": -# main() -import sys -sys.path.insert(0, '/content/PyGIP') - -# Import the modules -from pygip.datasets.datasets import Dataset -from pygip.src.custom_attack import FeatureFlipAttack - -# Set parameters directly instead of using argparse -dataset_name = "Cora" -fraction = 0.25 -device = None - -# Run the attack -dataset = Dataset(api_type="pyg", path="./data") -attack = FeatureFlipAttack(dataset, attack_node_fraction=fraction, model_path=None, device=device) -results = attack.attack(retrain_target=True, retrain_epochs=50, seed=0) -print("Attack results:", results) From c9ef377053bc4bd77d21226a85d7b14185aa8b8d Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Wed, 8 Oct 2025 23:34:10 +0500 Subject: [PATCH 12/22] Delete examples/run_custom_defense.py --- examples/run_custom_defense.py | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 examples/run_custom_defense.py diff --git a/examples/run_custom_defense.py b/examples/run_custom_defense.py deleted file mode 100644 index 237fc4d..0000000 --- a/examples/run_custom_defense.py +++ /dev/null @@ -1,19 +0,0 @@ -# examples/run_custom_defense.py -import argparse -from pygip.datasets.datasets import Dataset -from src.custom_defense import NeighborSmoothingDefense - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--dataset", type=str, default="Cora") - parser.add_argument("--fraction", type=float, default=0.1) - parser.add_argument("--device", type=str, default=None) - args = parser.parse_args() - - dataset = Dataset(api_type="pyg", path="./data") - defense = NeighborSmoothingDefense(dataset, attack_node_fraction=args.fraction, device=args.device) - results = defense.defend(retrain_epochs=50, seed=0) - print("Defense results:", results) - -if __name__ == "__main__": - main() From f9bcb29a92e55777a90261814a3fbb97529de4cd Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Wed, 8 Oct 2025 23:52:46 +0500 Subject: [PATCH 13/22] Create pyg_backbones.py --- pygip/models/nn/pyg_backbones.py | 96 ++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 pygip/models/nn/pyg_backbones.py diff --git a/pygip/models/nn/pyg_backbones.py b/pygip/models/nn/pyg_backbones.py new file mode 100644 index 0000000..bad65ec --- /dev/null +++ b/pygip/models/nn/pyg_backbones.py @@ -0,0 +1,96 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch_geometric.nn import GCNConv, SAGEConv, GATConv, GINConv, SGConv + + + +# ---------------------------- +# GCN +# ---------------------------- +class GCN(nn.Module): + def __init__(self, in_channels, out_channels, hidden=64, num_layers=2): + super().__init__() + self.convs = nn.ModuleList([GCNConv(in_channels, hidden)]) + for _ in range(num_layers - 2): + self.convs.append(GCNConv(hidden, hidden)) + self.convs.append(GCNConv(hidden, out_channels)) + + def forward(self, x, edge_index): + for conv in self.convs[:-1]: + x = F.relu(conv(x, edge_index)) + return self.convs[-1](x, edge_index) + +# ---------------------------- +# GraphSAGE +# ---------------------------- +class GraphSAGE(nn.Module): + def __init__(self, in_channels, out_channels, hidden=64, num_layers=2): + super().__init__() + self.convs = nn.ModuleList([SAGEConv(in_channels, hidden)]) + for _ in range(num_layers - 2): + self.convs.append(SAGEConv(hidden, hidden)) + self.convs.append(SAGEConv(hidden, out_channels)) + + def forward(self, x, edge_index): + for conv in self.convs[:-1]: + x = F.relu(conv(x, edge_index)) + return self.convs[-1](x, edge_index) + +# ---------------------------- +# GAT +# ---------------------------- +class GAT(nn.Module): + def __init__(self, in_channels, out_channels, hidden=64, num_layers=2, heads=4): + super().__init__() + self.convs = nn.ModuleList([GATConv(in_channels, hidden, heads=heads)]) + for _ in range(num_layers - 2): + self.convs.append(GATConv(hidden * heads, hidden, heads=heads)) + self.convs.append(GATConv(hidden * heads, out_channels, heads=1)) + + def forward(self, x, edge_index): + for conv in self.convs[:-1]: + x = F.elu(conv(x, edge_index)) + return self.convs[-1](x, edge_index) + +# ---------------------------- +# GIN +# ---------------------------- +class GIN(nn.Module): + def __init__(self, in_dim, out_dim, hid_dim=64, num_layers=2): + super().__init__() + nn1 = nn.Sequential( + nn.Linear(in_dim, hid_dim), + nn.ReLU(), + nn.Linear(hid_dim, hid_dim) + ) + self.convs = nn.ModuleList([GINConv(nn1)]) + for _ in range(num_layers - 2): + nnk = nn.Sequential( + nn.Linear(hid_dim, hid_dim), + nn.ReLU(), + nn.Linear(hid_dim, hid_dim) + ) + self.convs.append(GINConv(nnk)) + nn_last = nn.Sequential( + nn.Linear(hid_dim, hid_dim), + nn.ReLU(), + nn.Linear(hid_dim, out_dim) + ) + self.convs.append(GINConv(nn_last)) + + def forward(self, x, edge_index): + for conv in self.convs[:-1]: + x = F.relu(conv(x, edge_index)) + return self.convs[-1](x, edge_index) + +# ---------------------------- +# SGC +# ---------------------------- +class SGC(nn.Module): + def __init__(self, in_dim, out_dim, K=2): + super().__init__() + self.conv = SGConv(in_dim, out_dim, K=K) + + def forward(self, x, edge_index): + return self.conv(x, edge_index) From 4f80a7e00a96ec6d21d3b7631c6acd7c74ef3b3b Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Wed, 8 Oct 2025 23:53:55 +0500 Subject: [PATCH 14/22] Add files via upload --- pygip/datasets/pyg_datasets.py | 181 +++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 pygip/datasets/pyg_datasets.py diff --git a/pygip/datasets/pyg_datasets.py b/pygip/datasets/pyg_datasets.py new file mode 100644 index 0000000..a875558 --- /dev/null +++ b/pygip/datasets/pyg_datasets.py @@ -0,0 +1,181 @@ +import torch +import torch.nn.functional as F +from torch_geometric.nn import GCNConv # for GCN models +from torch_geometric.datasets import ( + Planetoid, + Amazon, + Coauthor, + Flickr, + Reddit, + TUDataset, + FacebookPagePage, + LastFMAsia, + PolBlogs as PolBlogsPyG, + DBLP, + +) + +# ---------------------------- +# Base Dataset Wrapper +# ---------------------------- +class BasePyGDataset: + def __init__(self, dataset, data): + self.graph_dataset = dataset + self.graph_data = data + self.num_nodes = data.num_nodes + self.num_features = dataset.num_node_features + self.num_classes = dataset.num_classes + + def _generate_masks_by_classes(self, num_class_samples=100, val_count=500, test_count=1000, seed=42): + """Generate train/val/test masks by selecting fixed number of nodes per class.""" + num_nodes = self.graph_data.num_nodes + labels = self.graph_data.y + num_classes = int(labels.max().item()) + 1 + + used_mask = torch.zeros(num_nodes, dtype=torch.bool) + generator = torch.Generator().manual_seed(seed) + train_idx_parts = [] + + # train set + for c in range(num_classes): + class_idx = (labels == c).nonzero(as_tuple=True)[0] + if class_idx.numel() == 0: + continue + perm = class_idx[torch.randperm(class_idx.size(0), generator=generator)] + n_select = min(num_class_samples, perm.size(0)) + selected = perm[:n_select] + train_idx_parts.append(selected) + used_mask[selected] = True + + if len(train_idx_parts) == 0: + raise ValueError("No training samples available.") + + train_idx = torch.cat(train_idx_parts, dim=0) + + # val set + remaining_idx = (~used_mask).nonzero(as_tuple=True)[0] + remaining_perm = remaining_idx[torch.randperm(remaining_idx.size(0), generator=generator)] + val_take = min(val_count, remaining_perm.size(0)) + val_idx = remaining_perm[:val_take] + used_mask[val_idx] = True + + # test set + remaining_idx = (~used_mask).nonzero(as_tuple=True)[0] + test_take = min(test_count, remaining_idx.size(0)) + test_idx = remaining_idx[:test_take] + + self.graph_data.train_mask = self._index_to_mask(train_idx, num_nodes) + self.graph_data.val_mask = self._index_to_mask(val_idx, num_nodes) + self.graph_data.test_mask = self._index_to_mask(test_idx, num_nodes) + + def _index_to_mask(self, index: torch.Tensor, size: int): + mask = torch.zeros(size, dtype=torch.bool) + mask[index] = True + return mask + +# ---------------------------- +# Datasets +# ---------------------------- +class Cora(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Planetoid(root=path, name="Cora") + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" # required for CustomAttack + + +class CiteSeer(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Planetoid(root=path, name="CiteSeer") + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" + + + +class PubMed(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Planetoid(root=path, name="PubMed") + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" + +class DBLP(BasePyGDataset): + def __init__(self, path="./data"): + dataset = DBLP(root=path, name="DBLP") + super().__init__(dataset, dataset[0]) + self._generate_masks_by_classes() + self.api_type = "pyg" + + + +class Amazon(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Amazon(root=path, name="Computers") + super().__init__(dataset, dataset[0]) + self._generate_masks_by_classes() + self.api_type = "pyg" + + +class Photo(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Amazon(root=path, name="Photo") + super().__init__(dataset, dataset[0]) + self._generate_masks_by_classes() + self.api_type = "pyg" + + +class CoauthorCS(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Coauthor(root=path, name="CS") + super().__init__(dataset, dataset[0]) + self._generate_masks_by_classes() + self.api_type = "pyg" + + +class CoauthorPhysics(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Coauthor(root=path, name="Physics") + super().__init__(dataset, dataset[0]) + self._generate_masks_by_classes() + self.api_type = "pyg" + + +class ENZYMES(BasePyGDataset): + def __init__(self, path="./data"): + dataset = TUDataset(root=path, name="ENZYMES") + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" + + +class Facebook(BasePyGDataset): + def __init__(self, path="./data"): + dataset = FacebookPagePage(root=path) + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" + + +class Flickr(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Flickr(root=path) + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" + + +class PolBlogs(BasePyGDataset): + def __init__(self, path="./data"): + dataset = PolBlogsPyG(root=path) + super().__init__(dataset, dataset[0]) + self._generate_masks_by_classes() + self.api_type = "pyg" + + +class LastFM(BasePyGDataset): + def __init__(self, path="./data"): + dataset = LastFMAsia(root=path) + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" + + +class Reddit(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Reddit(root=path) + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" From 4779fedc39beeb93ff71589a4412dfc77756f2d9 Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Fri, 17 Oct 2025 00:51:09 +0500 Subject: [PATCH 15/22] Delete examples/run_table5.py --- examples/run_table5.py | 288 ----------------------------------------- 1 file changed, 288 deletions(-) delete mode 100644 examples/run_table5.py diff --git a/examples/run_table5.py b/examples/run_table5.py deleted file mode 100644 index f251272..0000000 --- a/examples/run_table5.py +++ /dev/null @@ -1,288 +0,0 @@ -# run_table5_full.py -# Rewritten to reproduce Figure 3 & Table 5 from Zhou et al. (2024) with aggregation + stability fixes - -import os, random, numpy as np, pandas as pd, sys -import torch, torch.nn as nn, torch.nn.functional as F -from torch_geometric.datasets import Planetoid, Amazon, CitationFull -from torch_geometric.data import Data -from sklearn.model_selection import train_test_split -import matplotlib.pyplot as plt - - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from pygip.models.nn.pyg_backbones import GCN, GAT, GraphSAGE, GIN, SGC - -# ---------------------------- -# Config -# ---------------------------- -SEEDS = [0, 1] -NUM_INDEP = 3 # fewer independent models -NUM_SURR = 3 # fewer surrogates -MODEL_TRAIN_EPOCHS = 40 -SURR_TRAIN_EPOCHS = 40 -COWN_TRAIN_EPOCHS = 20 -MASK_RATIOS = [0.0, 0.1, 0.2, 0.4] - - -# ---------------------------- -# Helpers -# ---------------------------- -def set_seed(seed=0): - random.seed(seed); np.random.seed(seed) - torch.manual_seed(seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed_all(seed) - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - -def load_dataset(name, device="cpu"): - lname = name.lower() - if lname in [ "pubmed","cora","citeseer"]: - dataset = Planetoid(root=f"data/{name}", name=name) - data = dataset[0].to(device) - elif "amazon" in lname: - sub = "Photo" if "photo" in lname else "Computers" - dataset = Amazon(root=f"data/{lname}", name=sub) - data = dataset[0].to(device) - elif lname in ["dblp","db_lp","db-lp"]: - dataset = CitationFull(root="data/dblp", name="dblp") - data = dataset[0].to(device) - else: - raise ValueError(f"Unknown dataset {name}") - return data, dataset - -def split_nodes(num_nodes, ratios=(0.3,0.3,0.3,0.1), seed=0): - rng = np.random.RandomState(seed) - perm = rng.permutation(num_nodes) - sizes = [int(r*num_nodes) for r in ratios] - sizes[-1] = num_nodes - sum(sizes[:-1]) - splits, names, start = {}, ["train","dshadow","dsurr","dtest"], 0 - for name, sz in zip(names, sizes): - idx = perm[start:start+sz] - mask = torch.zeros(num_nodes, dtype=torch.bool); mask[idx] = True - splits[name] = mask; start += sz - return splits - -def filter_edges_to_mask(data, mask): - ei = data.edge_index; mask = mask.to(ei.device) - keep = ((mask[ei[0]] == True) & (mask[ei[1]] == True)) - return ei[:, keep] - -def mask_features_global(data, mask_ratio=0.1, seed=0): - x = data.x.clone(); num_feats = x.size(1) - k = max(1, int(mask_ratio * num_feats)) - rng = np.random.RandomState(seed) - feat_idx = rng.choice(num_feats, k, replace=False) - x[:, feat_idx] = 0.0 - data2 = Data(x=x, edge_index=data.edge_index.clone(), y=data.y.clone()) - return data2, feat_idx - -# ---------------------------- -# Models & Training -# ---------------------------- -def build_model(model_type, in_dim, out_dim, layers=2): - cls_map = {"GCN": GCN, "GraphSAGE": GraphSAGE, "GAT": GAT, "GIN": GIN, "SGC": SGC} - cls = cls_map[model_type] - try: - return cls(in_channels=in_dim, out_channels=out_dim, num_layers=layers) - except TypeError: - return cls(in_dim, out_dim, layers) - -def train_model(model, data, train_mask, epochs=200, lr=0.01, device="cpu"): - model = model.to(device); data = data.to(device) - opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) - for _ in range(epochs): - model.train(); opt.zero_grad() - out = model(data.x, data.edge_index) - loss = F.cross_entropy(out[train_mask], data.y[train_mask]) - loss.backward(); opt.step() - return model - -def train_with_soft_labels(model, data, train_mask, soft_targets, epochs=200, lr=0.01, device="cpu"): - model = model.to(device); data = data.to(device) - soft_targets = soft_targets.to(device) - opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) - for _ in range(epochs): - model.train(); opt.zero_grad() - out = F.log_softmax(model(data.x, data.edge_index), dim=1) - loss = F.kl_div(out[train_mask], soft_targets[train_mask], reduction='batchmean') - loss.backward(); opt.step() - return model - -def compute_accuracy(model, data, mask): - model.eval() - with torch.no_grad(): - logits = model(data.x, data.edge_index) - pred = logits.argmax(dim=1) - return (pred[mask] == data.y[mask]).float().mean().item() * 100 - -def compute_fidelity(model, target, data, mask): - model.eval(); target.eval() - with torch.no_grad(): - pred_m = model(data.x, data.edge_index).argmax(dim=1) - pred_t = target(data.x, data.edge_index).argmax(dim=1) - return (pred_m[mask] == pred_t[mask]).float().mean().item() * 100 - -# ---------------------------- -# Holistic vectors & C_own -# ---------------------------- -def model_to_vector_probs(model, data, node_order=None): - model.eval() - with torch.no_grad(): - probs = F.softmax(model(data.x, data.edge_index), dim=1).cpu() - if node_order is None: - node_order = torch.arange(probs.size(0)) - return probs[node_order].reshape(-1).numpy() - -class COwn(nn.Module): - def __init__(self, input_dim): - super().__init__() - self.net = nn.Sequential( - nn.Linear(input_dim, 128), nn.ReLU(), - nn.Linear(128, 64), nn.ReLU(), - nn.Linear(64, 2) - ) - def forward(self, x): return self.net(x) - -# ---------------------------- -# Settings mapping (I–IV) -# ---------------------------- -def get_setting_architectures(setting): - overlapping, disjoint = ["GCN","GAT","GraphSAGE"], ["GIN","SGC"] - l_same, l_diff = 2, 3 - if setting == "I": Fs, Find, lFs, lFind = overlapping, overlapping, l_same, l_same - elif setting == "II": Fs, Find, lFs, lFind = overlapping, overlapping, l_diff, l_same - elif setting == "III": Fs, Find, lFs, lFind = disjoint, overlapping, l_same, l_same - elif setting == "IV": Fs, Find, lFs, lFind = disjoint, overlapping, l_diff, l_same - else: raise ValueError("Invalid setting") - return Fs, Find, lFs, lFind - -# ---------------------------- -# Main experiment (Table 5 / Fig 3) -# ---------------------------- -def run_table5_full(dataset_name, setting="I", inductive=False, device="cpu"): - data_orig, dataset = load_dataset(dataset_name, device=device) - in_dim, out_dim = dataset.num_features, dataset.num_classes - Fs, Find, lFs, lFind = get_setting_architectures(setting) - - results = [] - for seed in SEEDS: - set_seed(seed) - splits = split_nodes(data_orig.num_nodes, seed=seed) - node_order = torch.where(splits["train"])[0] - - # baseline target - base_model = build_model("GCN", in_dim, out_dim, 2) - base_model = train_model(base_model, data_orig, splits["train"], - epochs=MODEL_TRAIN_EPOCHS, device=device) - base_acc = compute_accuracy(base_model, data_orig, splits["dtest"]) - - for mask_ratio in MASK_RATIOS: - data_masked, _ = mask_features_global(data_orig, mask_ratio, seed=seed) - - # train masked target - tgt = build_model("GCN", in_dim, out_dim, 2) - tgt = train_model(tgt, data_masked, splits["train"], epochs=MODEL_TRAIN_EPOCHS, device=device) - tgt_acc = compute_accuracy(tgt, data_masked, splits["dtest"]) - drop = base_acc - tgt_acc - print(f"[{dataset_name}-{setting}-seed{seed}] Mask={mask_ratio:.2f}, acc={tgt_acc:.2f}, drop={drop:.2f}") - - # Independents - indep_vecs, indep_accs = [], [] - for arch in Find: - for j in range(NUM_INDEP): - m = build_model(arch, in_dim, out_dim, lFind) - m = train_model(m, data_masked, splits["train"], epochs=MODEL_TRAIN_EPOCHS, device=device) - indep_accs.append(compute_accuracy(m, data_masked, splits["dtest"])) - indep_vecs.append(model_to_vector_probs(m, data_masked, node_order)) - - # Surrogates - with torch.no_grad(): - soft_all = F.softmax(tgt(data_masked.x, data_masked.edge_index), dim=1).cpu() - - surr_vecs, surr_accs, surr_fids = [], [], [] - for arch in Fs: - for j in range(NUM_SURR): - m = build_model(arch, in_dim, out_dim, lFs) - m = train_with_soft_labels(m, data_masked, splits["train"], soft_all, - epochs=SURR_TRAIN_EPOCHS, device=device) - surr_accs.append(compute_accuracy(m, data_masked, splits["dtest"])) - surr_fids.append(compute_fidelity(m, tgt, data_masked, splits["dtest"])) - surr_vecs.append(model_to_vector_probs(m, data_masked, node_order)) - - # Ownership classifier (full batch training for stability) - X = np.vstack(indep_vecs + surr_vecs) - y = np.array([0]*len(indep_vecs) + [1]*len(surr_vecs)) - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.3, stratify=y, random_state=seed - ) - cown = COwn(X.shape[1]).to(device) - opt = torch.optim.Adam(cown.parameters(), lr=0.001, weight_decay=1e-4) - X_train_t, y_train_t = torch.tensor(X_train,dtype=torch.float32,device=device), torch.tensor(y_train,dtype=torch.long,device=device) - X_test_t, y_test_t = torch.tensor(X_test,dtype=torch.float32,device=device), torch.tensor(y_test,dtype=torch.long,device=device) - - for epoch in range(COWN_TRAIN_EPOCHS): - cown.train() - out = cown(X_train_t) - loss = F.cross_entropy(out, y_train_t) - opt.zero_grad(); loss.backward(); opt.step() - - with torch.no_grad(): - preds = cown(X_test_t).argmax(dim=1).cpu().numpy() - c_acc = (preds == y_test).mean()*100 - print(f"[{dataset_name}-{setting}-seed{seed}] C_own acc={c_acc:.2f}") - - # save - results.append({ - "dataset": dataset_name, - "setting": setting, - "mode": "Inductive" if inductive else "Transductive", - "seed": seed, - "mask_ratio": mask_ratio, - "target_acc": tgt_acc, - "indep_acc_mean": np.mean(indep_accs), - "surr_acc_mean": np.mean(surr_accs), - "surr_fid_mean": np.mean(surr_fids), - "cown_acc": c_acc - }) - - return pd.DataFrame(results) - -# ---------------------------- -# Driver -# ---------------------------- -if __name__ == "__main__": - os.makedirs("results", exist_ok=True) - datasets, settings = ["Cora","CiteSeer","PubMed","Amazon","dblp"], ["I","II","III","IV"] - device = "cuda" if torch.cuda.is_available() else "cpu" - all_results = [] - - for ds in datasets: - for st in settings: - for mode in [False, True]: # transductive=False, inductive=True - df = run_table5_full(dataset_name=ds, setting=st, inductive=mode, device=device) - all_results.append(df) - - all_results = pd.concat(all_results, ignore_index=True) - all_results.to_csv("results/all_results_per_seed.csv", index=False) - - # --- Aggregation for analyze_tables_extended.py --- - agg = all_results.groupby(["dataset","setting","mode"]).agg({ - "target_acc": ["mean","std"], - "indep_acc_mean": ["mean","std"], - "surr_acc_mean": ["mean","std"], - "surr_fid_mean": ["mean","std"], - "cown_acc": ["mean","std"] - }).reset_index() - - agg.columns = [ - "dataset","setting","mode", - "target_acc_mean","target_acc_std", - "indep_acc_mean","indep_acc_std", - "surr_acc_mean","surr_acc_std", - "surr_fid_mean","surr_fid_std", - "cown_acc_mean","cown_acc_std" - ] - agg.to_csv("results/table5_all_results.csv", index=False) - - print("✅ Saved results/all_results_per_seed.csv and results/table5_all_results.csv (aggregated)") From f51e81cf075dcfaf4e44bf3f235a1b0c40a8fdc1 Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Fri, 17 Oct 2025 00:51:23 +0500 Subject: [PATCH 16/22] Delete examples/run_bgrove.py --- examples/run_bgrove.py | 208 ----------------------------------------- 1 file changed, 208 deletions(-) delete mode 100644 examples/run_bgrove.py diff --git a/examples/run_bgrove.py b/examples/run_bgrove.py deleted file mode 100644 index e8e2c86..0000000 --- a/examples/run_bgrove.py +++ /dev/null @@ -1,208 +0,0 @@ -""" -examples/run_bgrove.py - -Integration of BGrOVe experiment (Table 4 reproduction) using PyGIP datasets and models. -- Preserves original evaluation: FPR, FNR, ACC -- Uses same dataset/model structure as main framework -""" - -import os, sys -import random -import numpy as np -import pandas as pd -import torch -import dgl -import torch.nn.functional as F -from sklearn.metrics.pairwise import cosine_similarity -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -# integrate with PyGIP -from pygip.datasets.pyg_datasets import Cora, CiteSeer, PubMed, DBLP, Amazon -from pygip.models.nn.pyg_backbones import GCN, GAT, GraphSAGE, GIN, SGC - - -# ---------------------------- -# Helpers -# ---------------------------- -def set_seed(seed=0): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed_all(seed) - - -def train_model(model, data, train_mask, epochs=50, lr=0.01, device="cpu"): - model = model.to(device) - data = data.to(device) - optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) - for epoch in range(epochs): - model.train() - optimizer.zero_grad() - out = model(data.x, data.edge_index) - loss = F.cross_entropy(out[train_mask], data.y[train_mask]) - loss.backward() - optimizer.step() - return model - - -def get_posteriors(model, data, nodes): - model.eval() - with torch.no_grad(): - logits = model(data.x, data.edge_index)[nodes] - probs = F.softmax(logits, dim=1).cpu().numpy() - return probs - - -def compute_metrics(true_labels, pred_labels): - true_labels = np.array(true_labels) - pred_labels = np.array(pred_labels) - FP = np.sum((pred_labels == 1) & (true_labels == 0)) - FN = np.sum((pred_labels == 0) & (true_labels == 1)) - TN = np.sum((pred_labels == 0) & (true_labels == 0)) - TP = np.sum((pred_labels == 1) & (true_labels == 1)) - FPR = FP / (FP + TN + 1e-8) * 100 - FNR = FN / (FN + TP + 1e-8) * 100 - ACC = (TP + TN) / (TP + TN + FP + FN + 1e-8) * 100 - return FPR, FNR, ACC - - -# ---------------------------- -# Model Builder -# ---------------------------- -def build_model(model_type, in_dim, out_dim, layers=2): - if model_type == "GCN": - return GCN(in_dim, 16, out_dim) - elif model_type == "GraphSAGE": - return GraphSAGE(in_dim, 16, out_dim) - elif model_type == "GAT": - return GAT(in_dim, 16, out_dim) - elif model_type == "GIN": - return GIN(in_dim, 16, out_dim) - elif model_type == "SGC": - return SGC(in_dim, out_dim) - else: - raise ValueError(f"Unknown model type: {model_type}") - -# ---------------------------- -# Threshold tuning -# ---------------------------- -def tune_threshold(Fs_star, Fs, Find, data, query_nodes): - scores, labels = [], [] - for star in Fs_star: - probs_star = get_posteriors(star, data, query_nodes) - for surrogate in Fs: - sim = cosine_similarity(probs_star, get_posteriors(surrogate, data, query_nodes)).mean() - scores.append(sim) - labels.append(1) - for ind in Find: - sim = cosine_similarity(probs_star, get_posteriors(ind, data, query_nodes)).mean() - scores.append(sim) - labels.append(0) - best_thr, best_acc = 0.5, 0 - for thr in np.linspace(0.1, 0.99, 50): - preds = [1 if s > thr else 0 for s in scores] - _, _, acc = compute_metrics(labels, preds) - if acc > best_acc: - best_acc, best_thr = acc, thr - return best_thr - - -# ---------------------------- -# Main Experiment -# ---------------------------- -def run_bgrove_experiment(dataset_cls, condition="CondA ✓", setting="I", device="cpu"): - ds = dataset_cls(path="./data") - data = ds.graph_data.to(device) - in_dim, out_dim = ds.num_features, ds.num_classes - train_mask = data.train_mask - - overlapping = ["GCN", "GAT", "GraphSAGE"] - disjoint = ["GIN", "SGC"] - layers_same, layers_diff = 2, 3 - - if setting == "I": - arch_Fs, arch_Find = overlapping, overlapping - nFs, nFind = layers_same, layers_same - elif setting == "II": - arch_Fs, arch_Find = overlapping, overlapping - nFs, nFind = layers_diff, layers_same - elif setting == "III": - arch_Fs, arch_Find = disjoint, overlapping - nFs, nFind = layers_same, layers_same - elif setting == "IV": - arch_Fs, arch_Find = disjoint, overlapping - nFs, nFind = layers_diff, layers_same - else: - raise ValueError("Invalid setting") - - target = train_model(build_model("GCN", in_dim, out_dim, 2), data, train_mask, device=device) - - Fs = [train_model(build_model(a, in_dim, out_dim, nFs), data, train_mask, device=device) - for a in arch_Fs] - set_seed(123 if condition != "CondA ✓" else 0) - Fs_star = [train_model(build_model(a, in_dim, out_dim, nFs), data, train_mask, device=device) - for a in arch_Fs] - Find = [train_model(build_model(a, in_dim, out_dim, nFind), data, train_mask, device=device) - for a in arch_Find] - - num_queries = max(1, int(0.1 * data.num_nodes)) - query_nodes = torch.randperm(data.num_nodes)[:num_queries] - thr = tune_threshold(Fs_star, Fs, Find, data, query_nodes) - - true_labels, pred_labels = [], [] - for model in Fs + Find: - for star in Fs_star: - sim = cosine_similarity( - get_posteriors(model, data, query_nodes), - get_posteriors(star, data, query_nodes) - ).mean() - true_labels.append(1 if model in Fs else 0) - pred_labels.append(1 if sim > thr else 0) - return compute_metrics(true_labels, pred_labels) - - -# ---------------------------- -# Multi-seed Runner -# ---------------------------- -def run_multi(dataset_cls, condition, setting, device="cpu", seeds=[0, 1, 2, 3, 4]): - all_fpr, all_fnr, all_acc = [], [], [] - for seed in seeds: - set_seed(seed) - FPR, FNR, ACC = run_bgrove_experiment(dataset_cls, condition, setting, device) - all_fpr.append(FPR) - all_fnr.append(FNR) - all_acc.append(ACC) - fmt = lambda arr: f"{np.mean(arr):.2f} ± {np.std(arr):.2f}" - return fmt(all_fpr), fmt(all_fnr), fmt(all_acc) - - -# ---------------------------- -# Main Entry -# ---------------------------- -if __name__ == "__main__": - datasets = [Cora, CiteSeer, PubMed, DBLP, Amazon] - conditions = ["CondA ✓", "CondA ✗"] - settings = ["I", "II", "III", "IV"] - - total = len(datasets) * len(conditions) * len(settings) - results = {} - count = 0 - - for DatasetClass in datasets: - for cond in conditions: - for setting in settings: - count += 1 - print(f"\n=== [{count}/{total}] {DatasetClass.__name__}, {cond}, Setting {setting} ===") - FPR, FNR, ACC = run_multi(DatasetClass, cond, setting) - results[(DatasetClass.__name__, cond, setting)] = [FPR, FNR, ACC] - - df = pd.DataFrame.from_dict(results, orient="index", columns=["FPR (%)", "FNR (%)", "ACC (%)"]) - df.index = pd.MultiIndex.from_tuples(df.index, names=["Dataset", "Condition", "Setting"]) - - print("\n=== Table 4: BGrOVe Results (mean ± std) ===") - print(df) - os.makedirs("results", exist_ok=True) - path = "results/BGrOVe_table4.csv" - df.to_csv(path) - print(f"\n✅ Results saved to {path}") From 2f3d510093fb3bedcdad25d4838ae6e632d7e8ab Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Fri, 17 Oct 2025 00:51:44 +0500 Subject: [PATCH 17/22] Delete examples/run_bboxve.py --- examples/run_bboxve.py | 164 ----------------------------------------- 1 file changed, 164 deletions(-) delete mode 100644 examples/run_bboxve.py diff --git a/examples/run_bboxve.py b/examples/run_bboxve.py deleted file mode 100644 index 79f544a..0000000 --- a/examples/run_bboxve.py +++ /dev/null @@ -1,164 +0,0 @@ -""" -run_bboxve.py — Backdoor-based Ownership Verification (BBoxVe) in PyG. - -This script: -- Injects a backdoor watermark trigger into node features. -- Trains a target model and an extracted surrogate model. -- Evaluates clean and backdoor performance (TCA, TBA, ECA, EBA). -- Loops over datasets and models automatically. -- Saves all results to results/BboxVe_results.csv -""" - -import os, sys -import torch -import random -import numpy as np -import pandas as pd -import torch.nn.functional as F -from torch_geometric.datasets import Planetoid -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from pygip.models.nn.pyg_backbones import GCN, GAT, GraphSAGE, GIN, SGC - -# from torch_geometric.nn import GINConv, SGConv -import torch.nn as nn - - - - - -# ---------------------------- -# Helpers -# ---------------------------- -def set_seed(seed=0): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed_all(seed) - - -def inject_backdoor(data, node_indices, num_features, fixed_val=10, trigger_size=35): - """Inject backdoor trigger on selected nodes.""" - poisoned_x = data.x.clone() - poisoned_y = data.y.clone() - least_class = torch.bincount(data.y).argmin() - - for idx in node_indices: - feat_ids = torch.randperm(num_features)[:trigger_size] - poisoned_x[idx, feat_ids] = fixed_val - poisoned_y[idx] = least_class - - return poisoned_x, poisoned_y - - -def train_model(model, data, train_idx, epochs=50, lr=0.01, device="cpu"): - model = model.to(device) - data = data.to(device) - opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) - - for epoch in range(epochs): - model.train() - opt.zero_grad() - out = model(data.x, data.edge_index) - loss = F.cross_entropy(out[train_idx], data.y[train_idx]) - loss.backward() - opt.step() - - return model - - -def evaluate(model, data, clean_idx, backdoor_idx): - model.eval() - with torch.no_grad(): - logits = model(data.x, data.edge_index) - preds = logits.argmax(dim=1) - - clean_acc = (preds[clean_idx] == data.y[clean_idx]).float().mean().item() - backdoor_acc = (preds[backdoor_idx] == data.y[backdoor_idx]).float().mean().item() - - return clean_acc * 100, backdoor_acc * 100 - - -# ---------------------------- -# Main Experiment -# ---------------------------- -def run_experiment(dataset_name, model_type, with_backdoor=True, device="cpu"): - dataset = Planetoid(root=f"data/{dataset_name}", name=dataset_name) - data = dataset[0].to(device) - num_nodes = data.num_nodes - - idx = torch.randperm(num_nodes) - train_idx = idx[: int(0.2 * num_nodes)] - surr_idx = idx[int(0.2 * num_nodes): int(0.6 * num_nodes)] - test_idx = idx[int(0.6 * num_nodes):] - - bd_train_idx = train_idx[torch.randperm(len(train_idx))[: int(0.15 * len(train_idx))]] - bd_test_idx = test_idx[torch.randperm(len(test_idx))[: int(0.10 * len(test_idx))]] - - if with_backdoor: - data.x, data.y = inject_backdoor(data, bd_train_idx, dataset.num_features) - data.x, data.y = inject_backdoor(data, bd_test_idx, dataset.num_features) - - # Select model - if model_type == "GCN": - model_fn = lambda: GCN(dataset.num_features, 64, dataset.num_classes) - elif model_type == "GAT": - model_fn = lambda: GAT(dataset.num_features, 64, dataset.num_classes) - elif model_type == "GraphSAGE": - model_fn = lambda: GraphSAGE(dataset.num_features, 64, dataset.num_classes) - elif model_type == "GIN": - model_fn = lambda: GIN(dataset.num_features, 64, dataset.num_classes) - elif model_type == "SGC": - model_fn = lambda: SGC(dataset.num_features, dataset.num_classes) - else: - raise ValueError(f"Unknown model type: {model_type}") - - target = train_model(model_fn(), data, train_idx, device=device) - - surr_data = data if with_backdoor else dataset[0].clone() - surrogate = train_model(model_fn(), surr_data, surr_idx, device=device) - - clean_idx = torch.tensor(list(set(test_idx.tolist()) - set(bd_test_idx.tolist())), dtype=torch.long) - TCA, TBA = evaluate(target, data, clean_idx, bd_test_idx) - ECA, EBA = evaluate(surrogate, data, clean_idx, bd_test_idx) - - return { - "Dataset": dataset_name, - "Model": model_type, - "Setting": "With Backdoor" if with_backdoor else "Without Backdoor", - "TCA": TCA, - "ECA": ECA, - "TBA": TBA, - "EBA": EBA - } - - -# ---------------------------- -# Runner -# ---------------------------- -if __name__ == "__main__": - set_seed(0) - device = "cuda" if torch.cuda.is_available() else "cpu" - os.makedirs("results", exist_ok=True) - out_file = "results/BboxVe_results.csv" - - datasets = ["Cora", "CiteSeer", "PubMed"] - models = ["GCN", "GAT", "GraphSAGE", "GIN", "SGC"] - - all_results = [] - - for dataset in datasets: - for model_type in models: - print(f"\n=== Running {dataset} | {model_type} | With Backdoor ===") - res = run_experiment(dataset, model_type, with_backdoor=True, device=device) - all_results.append(res) - - df = pd.DataFrame(all_results) - if os.path.exists(out_file): - df.to_csv(out_file, mode="a", header=False, index=False) - else: - df.to_csv(out_file, index=False) - - print("\n=== All Table 3 Rows Added ===") - print(df) From f944268af37c96fe30a5dce41177f514800df5ba Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Fri, 17 Oct 2025 00:52:00 +0500 Subject: [PATCH 18/22] Delete examples/adversial_table8.py --- examples/adversial_table8.py | 177 ----------------------------------- 1 file changed, 177 deletions(-) delete mode 100644 examples/adversial_table8.py diff --git a/examples/adversial_table8.py b/examples/adversial_table8.py deleted file mode 100644 index 5c3b144..0000000 --- a/examples/adversial_table8.py +++ /dev/null @@ -1,177 +0,0 @@ -# analyze_table8_double_extraction.py -# Reproduce Table 8 (Double Extraction Robustness) -# Matches Zhou et al. 2024 format - -import os, sys, copy -import numpy as np, pandas as pd -import torch, torch.nn.functional as F -from torch_geometric.data import Data -from sklearn.decomposition import PCA - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from run_table5 import ( - load_dataset, set_seed, build_model, - train_model, model_to_vector_probs, get_setting_architectures, COwn -) - -# ----------------------------- -# Config -# ----------------------------- -DEVICE = "cuda" if torch.cuda.is_available() else "cpu" -MODEL_TRAIN_EPOCHS = 80 -COWN_TRAIN_EPOCHS = 40 -EXTRACT_EPOCHS = 40 -SEEDS = [0, 1, 2] - -# ----------------------------- -# Double Extraction -# ----------------------------- -def extract_once(target_model, data, epochs=EXTRACT_EPOCHS, device="cpu"): - """Perform a single extraction attack using pseudo-labels from target_model.""" - target_model.eval() - with torch.no_grad(): - logits = target_model(data.x.to(device), data.edge_index.to(device)) - pseudo_labels = logits.argmax(dim=1).cpu() - - extracted = build_model("GCN", data.num_features, len(torch.unique(data.y)), 2) - mask = torch.ones(data.num_nodes, dtype=torch.bool) - extracted = train_model(extracted, data, mask, epochs=epochs, device=device) - return extracted - - -def double_extract_model(target_model, data, epochs=EXTRACT_EPOCHS, device="cpu"): - """Perform two rounds of extraction: F -> Ft -> Fs.""" - Ft = extract_once(target_model, data, epochs=epochs, device=device) - Fs = extract_once(Ft, data, epochs=epochs, device=device) - return Fs - - -# ----------------------------- -# Ownership verifier training -# ----------------------------- -def train_ownership_verifier(data, setting, device="cpu"): - in_dim, out_dim = data.num_features, len(torch.unique(data.y)) - Fs, Find, lFs, lFind = get_setting_architectures(setting) - owner_vecs, independent_vecs = [], [] - - # Owner models - for seed in SEEDS: - set_seed(seed) - mask = torch.randperm(data.num_nodes)[:int(0.6 * data.num_nodes)] - train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) - train_mask[mask] = True - for arch in Fs: - m = build_model(arch, in_dim, out_dim, lFs) - m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=device) - owner_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) - - # Independent models - for seed in SEEDS: - set_seed(seed + 100) - mask = torch.randperm(data.num_nodes)[:int(0.3 * data.num_nodes)] - ind_mask = torch.zeros(data.num_nodes, dtype=torch.bool) - ind_mask[mask] = True - for arch in Find: - m = build_model(arch, in_dim, out_dim, lFind) - m = train_model(m, data, ind_mask, epochs=MODEL_TRAIN_EPOCHS, device=device) - independent_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) - - X_owner_np = np.vstack(owner_vecs) - X_ind_np = np.vstack(independent_vecs) - X_all = np.vstack([X_owner_np, X_ind_np]) - - n_samples, n_features = X_all.shape - n_comp = min(128, n_samples, n_features) - if n_comp < n_features: - pca = PCA(n_components=n_comp) - X_all = pca.fit_transform(X_all) - if X_all.shape[1] < 128: - padding = np.zeros((X_all.shape[0], 128 - X_all.shape[1])) - X_all = np.hstack([X_all, padding]) - - n_owner = len(owner_vecs) - X_owner_np = X_all[:n_owner] - X_ind_np = X_all[n_owner:] - - X_train = torch.tensor(X_all, dtype=torch.float32, device=device) - y_train = torch.tensor(np.hstack([np.ones(n_owner), np.zeros(len(X_ind_np))]), - dtype=torch.long, device=device) - cown = COwn(input_dim=128).to(device) - opt = torch.optim.Adam(cown.parameters(), lr=0.001) - - for epoch in range(COWN_TRAIN_EPOCHS): - cown.train() - opt.zero_grad() - logits = cown(X_train) - loss = F.cross_entropy(logits, y_train) - loss.backward() - opt.step() - - return cown, X_owner_np, X_ind_np - - -# ----------------------------- -# Eval metrics (FPR, FNR, ACC) -# ----------------------------- -def evaluate_cown(cown, X_owner_np, X_ind_np, device="cpu"): - X_owner = torch.tensor(X_owner_np, dtype=torch.float32, device=device) - X_ind = torch.tensor(X_ind_np, dtype=torch.float32, device=device) - cown.eval() - with torch.no_grad(): - preds_owner = cown(X_owner).argmax(dim=1).cpu().numpy() - preds_ind = cown(X_ind).argmax(dim=1).cpu().numpy() - fnr = (preds_owner == 0).mean() * 100 - fpr = (preds_ind == 1).mean() * 100 - acc = ((preds_owner == 1).sum() + (preds_ind == 0).sum()) / (len(preds_owner) + len(preds_ind)) * 100 - return fpr, fnr, acc - - -# ----------------------------- -# Generate Table 8 -# ----------------------------- -def generate_table8(all_results_csv="results/table5_all_results.csv"): - df = pd.read_csv(all_results_csv) - if "cown_acc_mean" not in df.columns: - raise KeyError("Expected 'cown_acc_mean' in all_results.csv") - - os.makedirs("results", exist_ok=True) - table8 = [] - - for (ds, st, md), sub in df.groupby(["dataset", "setting", "mode"]): - print(f"\n=== {ds} / Setting {st} / Mode {md} ===") - data, _ = load_dataset(ds, device=DEVICE) - num_nodes = data.num_nodes - train_nodes = torch.randperm(num_nodes)[:int(0.6 * num_nodes)] - train_mask = torch.zeros(num_nodes, dtype=torch.bool) - train_mask[train_nodes] = True - - # Train base target - Fs, Find, lFs, lFind = get_setting_architectures(st) - target_arch = Fs[0] if len(Fs) > 0 else "GCN" - m = build_model(target_arch, data.num_features, len(torch.unique(data.y)), lFs) - m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=DEVICE) - - ori_acc = (m(data.x.to(DEVICE), data.edge_index.to(DEVICE)).argmax(dim=1) == data.y.to(DEVICE)).float().mean().item() * 100 - - # Perform double extraction - m_double = double_extract_model(m, data, epochs=EXTRACT_EPOCHS, device=DEVICE) - - # Train ownership verifier - trained_cown, X_owner_np, X_ind_np = train_ownership_verifier(data, st, device=DEVICE) - fpr, fnr, acc_cown = evaluate_cown(trained_cown, X_owner_np, X_ind_np, device=DEVICE) - - table8.append({ - "Dataset": ds, "Setting": st, "Mode": md, - "Ori_ACC(%)": round(ori_acc, 2), - "FPR(%)": round(fpr, 2), - "FNR(%)": round(fnr, 2), - "Double_ACC(%)": round(acc_cown, 2) - }) - - pd.DataFrame(table8).to_csv("results/table8.csv", index=False) - print("\n✅ Saved results/table8.csv") - - -# ----------------------------- -if __name__ == "__main__": - generate_table8() From c0ea60ec1789ec8412bff0289f460f84fd46f95a Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Fri, 17 Oct 2025 00:52:13 +0500 Subject: [PATCH 19/22] Delete examples/adversial.py --- examples/adversial.py | 240 ------------------------------------------ 1 file changed, 240 deletions(-) delete mode 100644 examples/adversial.py diff --git a/examples/adversial.py b/examples/adversial.py deleted file mode 100644 index 7099256..0000000 --- a/examples/adversial.py +++ /dev/null @@ -1,240 +0,0 @@ -# analyze_tables_extended.py -# Reproduce Table 6 (Fine-tuning robustness) and Table 7 (False positives) -# Matches Zhou et al. 2024 format - -import os, sys, copy -import numpy as np, pandas as pd -import torch, torch.nn.functional as F -from torch_geometric.data import Data -from torch_geometric.utils import subgraph -from sklearn.decomposition import PCA - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from run_table5 import ( - load_dataset, set_seed, build_model, - train_model, model_to_vector_probs, get_setting_architectures, COwn -) - -# ----------------------------- -# Config -# ----------------------------- -DEVICE = "cuda" if torch.cuda.is_available() else "cpu" -MODEL_TRAIN_EPOCHS = 80 -COWN_TRAIN_EPOCHS = 40 -FINETUNE_EPOCHS = 20 -INDEPENDENT_MODEL_EPOCHS = 40 -SEEDS = [0, 1, 2] - -# ----------------------------- -# Fine-tuning (FGSM-like) -# ----------------------------- -def finetune_model(model, data, train_mask, epochs=20, lr=0.005, device="cpu"): - model_ft = copy.deepcopy(model).to(device) - - data_adv = Data( - x=data.x.clone().detach().to(device), - edge_index=data.edge_index.clone().to(device), - y=data.y.clone().to(device) - ) - data_adv.x.requires_grad = True - opt = torch.optim.Adam(model_ft.parameters(), lr=lr, weight_decay=5e-4) - - for epoch in range(epochs): - model_ft.train() - opt.zero_grad() - out = model_ft(data_adv.x, data_adv.edge_index) - loss = F.cross_entropy(out[train_mask], data_adv.y[train_mask]) - loss.backward() - - with torch.no_grad(): - if data_adv.x.grad is not None: - epsilon = 0.02 * (epoch + 1) / epochs - grad_sign = data_adv.x.grad.sign() - data_adv.x.data = data_adv.x.data + epsilon * grad_sign - data_adv.x.grad.zero_() - - opt.step() - return model_ft - -# ----------------------------- -# Ownership verifier training -# ----------------------------- -def train_ownership_verifier(data, setting, device="cpu"): - in_dim, out_dim = data.num_features, len(torch.unique(data.y)) - Fs, Find, lFs, lFind = get_setting_architectures(setting) - - owner_vecs, independent_vecs = [], [] - - # Owner models - for seed in SEEDS: - set_seed(seed) - mask = torch.randperm(data.num_nodes)[:int(0.6 * data.num_nodes)] - train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) - train_mask[mask] = True - for arch in Fs: - m = build_model(arch, in_dim, out_dim, lFs) - m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=device) - owner_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) - - # Independent models - for seed in SEEDS: - set_seed(seed + 100) - mask = torch.randperm(data.num_nodes)[:int(0.3 * data.num_nodes)] - ind_mask = torch.zeros(data.num_nodes, dtype=torch.bool) - ind_mask[mask] = True - for arch in Find: - m = build_model(arch, in_dim, out_dim, lFind) - m = train_model(m, data, ind_mask, epochs=INDEPENDENT_MODEL_EPOCHS, device=device) - independent_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) - - X_owner_np = np.vstack(owner_vecs) - X_ind_np = np.vstack(independent_vecs) - - # Reduce to 128-d - X_all = np.vstack([X_owner_np, X_ind_np]) - n_samples, n_features = X_all.shape - n_comp = min(128, n_samples, n_features) - if n_comp < n_features: - pca = PCA(n_components=n_comp) - X_all = pca.fit_transform(X_all) - if X_all.shape[1] < 128: - padding = np.zeros((X_all.shape[0], 128 - X_all.shape[1])) - X_all = np.hstack([X_all, padding]) - - n_owner = len(owner_vecs) - X_owner_np = X_all[:n_owner] - X_ind_np = X_all[n_owner:] - - # Train classifier - X_train = torch.tensor(X_all, dtype=torch.float32, device=device) - y_train = torch.tensor(np.hstack([np.ones(n_owner), np.zeros(len(X_ind_np))]), - dtype=torch.long, device=device) - cown = COwn(input_dim=128).to(device) - opt = torch.optim.Adam(cown.parameters(), lr=0.001) - - for epoch in range(COWN_TRAIN_EPOCHS): - cown.train() - opt.zero_grad() - logits = cown(X_train) - loss = F.cross_entropy(logits, y_train) - loss.backward() - opt.step() - - return cown, X_owner_np, X_ind_np - -# ----------------------------- -# Eval metrics (FPR, FNR, ACC) -# ----------------------------- -def evaluate_cown(cown, X_owner_np, X_ind_np, device="cpu"): - X_owner = torch.tensor(X_owner_np, dtype=torch.float32, device=device) - X_ind = torch.tensor(X_ind_np, dtype=torch.float32, device=device) - - cown.eval() - with torch.no_grad(): - preds_owner = cown(X_owner).argmax(dim=1).cpu().numpy() - preds_ind = cown(X_ind).argmax(dim=1).cpu().numpy() - - fnr = (preds_owner == 0).mean() * 100 - fpr = (preds_ind == 1).mean() * 100 - acc = ( (preds_owner == 1).sum() + (preds_ind == 0).sum() ) / (len(preds_owner)+len(preds_ind)) * 100 - return fpr, fnr, acc - -# ----------------------------- -# False positives (Table 7) -# ----------------------------- -def run_false_positive_experiment(data_orig, dataset_name, setting, cown, node_order, device="cpu", repeats=5): - in_dim, out_dim = data_orig.num_features, len(torch.unique(data_orig.y)) - Fs, Find, lFs, lFind = get_setting_architectures(setting) - - fpr_list = [] - for rep in range(repeats): - set_seed(rep + 500) - num_nodes = data_orig.num_nodes - independent_train = torch.randperm(num_nodes)[:int(0.3 * num_nodes)] - independent_mask = torch.zeros(num_nodes, dtype=torch.bool) - independent_mask[independent_train] = True - - independent_vecs = [] - for arch in Find: - m = build_model(arch, in_dim, out_dim, lFind) - m = train_model(m, data_orig, independent_mask, epochs=INDEPENDENT_MODEL_EPOCHS, device=device) - independent_vecs.append(model_to_vector_probs(m, data_orig, node_order)) - - X_independent_np = np.vstack(independent_vecs) - n_samples, n_features = X_independent_np.shape - n_comp = min(128, n_samples, n_features) - if n_comp < n_features: - pca = PCA(n_components=n_comp) - X_independent_np = pca.fit_transform(X_independent_np) - if X_independent_np.shape[1] < 128: - padding = np.zeros((X_independent_np.shape[0], 128 - X_independent_np.shape[1])) - X_independent_np = np.hstack([X_independent_np, padding]) - - X_independent = torch.tensor(X_independent_np, dtype=torch.float32, device=device) - cown.eval() - with torch.no_grad(): - preds = cown(X_independent).argmax(dim=1).cpu().numpy() - - fpr = (preds == 1).mean() * 100 - fpr_list.append(fpr) - - return np.mean(fpr_list), np.std(fpr_list) - -# ----------------------------- -# Generate Table 6 and Table 7 -# ----------------------------- -def generate_tables(all_results_csv="results/table5_all_results.csv"): - df = pd.read_csv(all_results_csv) - if "cown_acc_mean" not in df.columns: - raise KeyError("Expected 'cown_acc_mean' in all_results.csv") - - os.makedirs("results", exist_ok=True) - table6, table7 = [], [] - - for (ds, st, md), sub in df.groupby(["dataset", "setting", "mode"]): - print(f"\n=== {ds} / Setting {st} / Mode {md} ===") - - data, _ = load_dataset(ds, device=DEVICE) - num_nodes = data.num_nodes - train_nodes = torch.randperm(num_nodes)[:int(0.6 * num_nodes)] - train_mask = torch.zeros(num_nodes, dtype=torch.bool) - train_mask[train_nodes] = True - - # Train + fine-tune - Fs, Find, lFs, lFind = get_setting_architectures(st) - target_arch = Fs[0] if len(Fs) > 0 else "GCN" - m = build_model(target_arch, data.num_features, len(torch.unique(data.y)), lFs) - m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=DEVICE) - m_finetuned = finetune_model(m, data, train_mask, epochs=FINETUNE_EPOCHS, device=DEVICE) - - ori_acc = (m(data.x.to(DEVICE), data.edge_index.to(DEVICE)).argmax(dim=1) == data.y.to(DEVICE)).float().mean().item() * 100 - - # Train C_own - trained_cown, X_owner_np, X_ind_np = train_ownership_verifier(data, st, device=DEVICE) - fpr, fnr, acc_cown = evaluate_cown(trained_cown, X_owner_np, X_ind_np, device=DEVICE) - - # Table 6 - table6.append({ - "Dataset": ds, "Setting": st, "Mode": md, - "Ori_ACC(%)": round(ori_acc, 2), - "FPR(%)": round(fpr, 2), - "FNR(%)": round(fnr, 2), - "Fine_ACC(%)": round(acc_cown, 2) - }) - - # Table 7 - node_order = torch.arange(data.num_nodes) - fpr_mean, fpr_std = run_false_positive_experiment(data, ds, st, trained_cown, node_order, device=DEVICE) - table7.append({ - "Dataset": ds, "Setting": st, "Mode": md, - "FPR": f"{fpr_mean:.2f} ± {fpr_std:.2f}" - }) - - pd.DataFrame(table6).to_csv("results/table6.csv", index=False) - pd.DataFrame(table7).to_csv("results/table7.csv", index=False) - print("\n✅ Saved results/table6.csv and table7.csv") - - -# ----------------------------- -if __name__ == "__main__": - generate_tables() From 8b3acec08772479b920133d0b09bd20f24a447ab Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Fri, 17 Oct 2025 00:53:07 +0500 Subject: [PATCH 20/22] Add files via upload --- examples/run_table3.py | 19 +++++++++++++++++++ examples/run_table4.py | 19 +++++++++++++++++++ examples/run_table5.py | 18 ++++++++++++++++++ examples/run_table6_7.py | 15 +++++++++++++++ examples/run_table8.py | 14 ++++++++++++++ 5 files changed, 85 insertions(+) create mode 100644 examples/run_table3.py create mode 100644 examples/run_table4.py create mode 100644 examples/run_table5.py create mode 100644 examples/run_table6_7.py create mode 100644 examples/run_table8.py diff --git a/examples/run_table3.py b/examples/run_table3.py new file mode 100644 index 0000000..8f5cffb --- /dev/null +++ b/examples/run_table3.py @@ -0,0 +1,19 @@ +""" +Example Script: run_example_bboxve.py +-------------------------------------- +Demonstrates how to run the BBoxVe (Backdoor-based Ownership Verification) +experiment from Table 3 using PyGIP. +""" + +import torch +from implementation.run_bboxve import run_experiment + +def main(): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + res = run_experiment("Cora", "GCN", with_backdoor=True, device=device) + print("\n=== Single-run Result (Table 3 Example) ===") + print(res) + +if __name__ == "__main__": + main() diff --git a/examples/run_table4.py b/examples/run_table4.py new file mode 100644 index 0000000..b9ca170 --- /dev/null +++ b/examples/run_table4.py @@ -0,0 +1,19 @@ +""" +Example Script: run_example_bgrove.py +-------------------------------------- +Demonstrates how to reproduce one configuration of the BGrOVe +experiment (Table 4). +""" + +import torch +from implementation.run_bgrove import run_bgrove_experiment +from pygip.datasets.pyg_datasets import Cora + +def main(): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + res = run_bgrove_experiment(Cora, condition="CondA ✓", setting="I", device=device) + print("\n=== Single-run Result (Table 4 Example) ===") + print("FPR, FNR, ACC =", res) + +if __name__ == "__main__": + main() diff --git a/examples/run_table5.py b/examples/run_table5.py new file mode 100644 index 0000000..4712fac --- /dev/null +++ b/examples/run_table5.py @@ -0,0 +1,18 @@ +""" +Example Script: run_example_table5.py +-------------------------------------- +Demonstrates how to run the main Table 5 experiment (and Figure 3) +using the unified training pipeline. +""" + +import torch +from implementation.run_table5_full import run_table5_full + +def main(): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + df = run_table5_full(dataset_name="Cora", setting="I", device=device) + print("\n=== Single-run Result (Table 5 Example) ===") + print(df.head()) + +if __name__ == "__main__": + main() diff --git a/examples/run_table6_7.py b/examples/run_table6_7.py new file mode 100644 index 0000000..48bb727 --- /dev/null +++ b/examples/run_table6_7.py @@ -0,0 +1,15 @@ +""" +Example Script: run_example_analyze_extended.py +-------------------------------------- +Runs the analysis that produces Table 6 (fine-tuning robustness) +and Table 7 (false positives). +""" + +from implementation.adversial import generate_tables + +def main(): + print("Running analysis for Tables 6 & 7 ...") + generate_tables("results/table5_all_results.csv") + +if __name__ == "__main__": + main() diff --git a/examples/run_table8.py b/examples/run_table8.py new file mode 100644 index 0000000..907cd37 --- /dev/null +++ b/examples/run_table8.py @@ -0,0 +1,14 @@ +""" +Example Script: run_example_double_extraction.py +-------------------------------------- +Demonstrates how to reproduce Table 8 (Double Extraction Robustness). +""" + +from implementation.adversial_table8 import generate_table8 + +def main(): + print("Running Double Extraction analysis (Table 8) ...") + generate_table8("results/table5_all_results.csv") + +if __name__ == "__main__": + main() From f8b813c363c00677ce8c6844a6d92f7fbd8c81b6 Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Fri, 17 Oct 2025 00:55:00 +0500 Subject: [PATCH 21/22] Implement double extraction robustness and generate Table 8 This script implements the double extraction robustness method and generates Table 8 based on the results from previous experiments. It includes model training, evaluation metrics, and data handling to reproduce results as per Zhou et al. 2024. --- implementation/adversial_table8 | 176 ++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 implementation/adversial_table8 diff --git a/implementation/adversial_table8 b/implementation/adversial_table8 new file mode 100644 index 0000000..f8e7fa6 --- /dev/null +++ b/implementation/adversial_table8 @@ -0,0 +1,176 @@ +# Reproduce Table 8 (Double Extraction Robustness) +# Matches Zhou et al. 2024 format + +import os, sys, copy +import numpy as np, pandas as pd +import torch, torch.nn.functional as F +from torch_geometric.data import Data +from sklearn.decomposition import PCA + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from run_table5_full import ( + load_dataset, set_seed, build_model, + train_model, model_to_vector_probs, get_setting_architectures, COwn +) + +# ----------------------------- +# Config +# ----------------------------- +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +MODEL_TRAIN_EPOCHS = 80 +COWN_TRAIN_EPOCHS = 40 +EXTRACT_EPOCHS = 40 +SEEDS = [0, 1, 2] + +# ----------------------------- +# Double Extraction +# ----------------------------- +def extract_once(target_model, data, epochs=EXTRACT_EPOCHS, device="cpu"): + """Perform a single extraction attack using pseudo-labels from target_model.""" + target_model.eval() + with torch.no_grad(): + logits = target_model(data.x.to(device), data.edge_index.to(device)) + pseudo_labels = logits.argmax(dim=1).cpu() + + extracted = build_model("GCN", data.num_features, len(torch.unique(data.y)), 2) + mask = torch.ones(data.num_nodes, dtype=torch.bool) + extracted = train_model(extracted, data, mask, epochs=epochs, device=device) + return extracted + + +def double_extract_model(target_model, data, epochs=EXTRACT_EPOCHS, device="cpu"): + """Perform two rounds of extraction: F -> Ft -> Fs.""" + Ft = extract_once(target_model, data, epochs=epochs, device=device) + Fs = extract_once(Ft, data, epochs=epochs, device=device) + return Fs + + +# ----------------------------- +# Ownership verifier training +# ----------------------------- +def train_ownership_verifier(data, setting, device="cpu"): + in_dim, out_dim = data.num_features, len(torch.unique(data.y)) + Fs, Find, lFs, lFind = get_setting_architectures(setting) + owner_vecs, independent_vecs = [], [] + + # Owner models + for seed in SEEDS: + set_seed(seed) + mask = torch.randperm(data.num_nodes)[:int(0.6 * data.num_nodes)] + train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + train_mask[mask] = True + for arch in Fs: + m = build_model(arch, in_dim, out_dim, lFs) + m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=device) + owner_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) + + # Independent models + for seed in SEEDS: + set_seed(seed + 100) + mask = torch.randperm(data.num_nodes)[:int(0.3 * data.num_nodes)] + ind_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + ind_mask[mask] = True + for arch in Find: + m = build_model(arch, in_dim, out_dim, lFind) + m = train_model(m, data, ind_mask, epochs=MODEL_TRAIN_EPOCHS, device=device) + independent_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) + + X_owner_np = np.vstack(owner_vecs) + X_ind_np = np.vstack(independent_vecs) + X_all = np.vstack([X_owner_np, X_ind_np]) + + n_samples, n_features = X_all.shape + n_comp = min(128, n_samples, n_features) + if n_comp < n_features: + pca = PCA(n_components=n_comp) + X_all = pca.fit_transform(X_all) + if X_all.shape[1] < 128: + padding = np.zeros((X_all.shape[0], 128 - X_all.shape[1])) + X_all = np.hstack([X_all, padding]) + + n_owner = len(owner_vecs) + X_owner_np = X_all[:n_owner] + X_ind_np = X_all[n_owner:] + + X_train = torch.tensor(X_all, dtype=torch.float32, device=device) + y_train = torch.tensor(np.hstack([np.ones(n_owner), np.zeros(len(X_ind_np))]), + dtype=torch.long, device=device) + cown = COwn(input_dim=128).to(device) + opt = torch.optim.Adam(cown.parameters(), lr=0.001) + + for epoch in range(COWN_TRAIN_EPOCHS): + cown.train() + opt.zero_grad() + logits = cown(X_train) + loss = F.cross_entropy(logits, y_train) + loss.backward() + opt.step() + + return cown, X_owner_np, X_ind_np + + +# ----------------------------- +# Eval metrics (FPR, FNR, ACC) +# ----------------------------- +def evaluate_cown(cown, X_owner_np, X_ind_np, device="cpu"): + X_owner = torch.tensor(X_owner_np, dtype=torch.float32, device=device) + X_ind = torch.tensor(X_ind_np, dtype=torch.float32, device=device) + cown.eval() + with torch.no_grad(): + preds_owner = cown(X_owner).argmax(dim=1).cpu().numpy() + preds_ind = cown(X_ind).argmax(dim=1).cpu().numpy() + fnr = (preds_owner == 0).mean() * 100 + fpr = (preds_ind == 1).mean() * 100 + acc = ((preds_owner == 1).sum() + (preds_ind == 0).sum()) / (len(preds_owner) + len(preds_ind)) * 100 + return fpr, fnr, acc + + +# ----------------------------- +# Generate Table 8 +# ----------------------------- +def generate_table8(all_results_csv="results/table5_all_results.csv"): + df = pd.read_csv(all_results_csv) + if "cown_acc_mean" not in df.columns: + raise KeyError("Expected 'cown_acc_mean' in all_results.csv") + + os.makedirs("results", exist_ok=True) + table8 = [] + + for (ds, st, md), sub in df.groupby(["dataset", "setting", "mode"]): + print(f"\n=== {ds} / Setting {st} / Mode {md} ===") + data, _ = load_dataset(ds, device=DEVICE) + num_nodes = data.num_nodes + train_nodes = torch.randperm(num_nodes)[:int(0.6 * num_nodes)] + train_mask = torch.zeros(num_nodes, dtype=torch.bool) + train_mask[train_nodes] = True + + # Train base target + Fs, Find, lFs, lFind = get_setting_architectures(st) + target_arch = Fs[0] if len(Fs) > 0 else "GCN" + m = build_model(target_arch, data.num_features, len(torch.unique(data.y)), lFs) + m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=DEVICE) + + ori_acc = (m(data.x.to(DEVICE), data.edge_index.to(DEVICE)).argmax(dim=1) == data.y.to(DEVICE)).float().mean().item() * 100 + + # Perform double extraction + m_double = double_extract_model(m, data, epochs=EXTRACT_EPOCHS, device=DEVICE) + + # Train ownership verifier + trained_cown, X_owner_np, X_ind_np = train_ownership_verifier(data, st, device=DEVICE) + fpr, fnr, acc_cown = evaluate_cown(trained_cown, X_owner_np, X_ind_np, device=DEVICE) + + table8.append({ + "Dataset": ds, "Setting": st, "Mode": md, + "Ori_ACC(%)": round(ori_acc, 2), + "FPR(%)": round(fpr, 2), + "FNR(%)": round(fnr, 2), + "Double_ACC(%)": round(acc_cown, 2) + }) + + pd.DataFrame(table8).to_csv("results/table8.csv", index=False) + print("\n✅ Saved results/table8.csv") + + +# ----------------------------- +if __name__ == "__main__": + generate_table8() From 93011eb059bcfd9a7f88e3250735be3c666e18a2 Mon Sep 17 00:00:00 2001 From: Iqra171 Date: Fri, 17 Oct 2025 00:56:13 +0500 Subject: [PATCH 22/22] Add files via upload --- implementation/adversial.py | 240 +++++++++++++++++++++++++ implementation/run_bboxve.py | 164 +++++++++++++++++ implementation/run_bgrove.py | 236 ++++++++++++++++++++++++ implementation/run_table5_full.py | 288 ++++++++++++++++++++++++++++++ 4 files changed, 928 insertions(+) create mode 100644 implementation/adversial.py create mode 100644 implementation/run_bboxve.py create mode 100644 implementation/run_bgrove.py create mode 100644 implementation/run_table5_full.py diff --git a/implementation/adversial.py b/implementation/adversial.py new file mode 100644 index 0000000..7099256 --- /dev/null +++ b/implementation/adversial.py @@ -0,0 +1,240 @@ +# analyze_tables_extended.py +# Reproduce Table 6 (Fine-tuning robustness) and Table 7 (False positives) +# Matches Zhou et al. 2024 format + +import os, sys, copy +import numpy as np, pandas as pd +import torch, torch.nn.functional as F +from torch_geometric.data import Data +from torch_geometric.utils import subgraph +from sklearn.decomposition import PCA + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from run_table5 import ( + load_dataset, set_seed, build_model, + train_model, model_to_vector_probs, get_setting_architectures, COwn +) + +# ----------------------------- +# Config +# ----------------------------- +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +MODEL_TRAIN_EPOCHS = 80 +COWN_TRAIN_EPOCHS = 40 +FINETUNE_EPOCHS = 20 +INDEPENDENT_MODEL_EPOCHS = 40 +SEEDS = [0, 1, 2] + +# ----------------------------- +# Fine-tuning (FGSM-like) +# ----------------------------- +def finetune_model(model, data, train_mask, epochs=20, lr=0.005, device="cpu"): + model_ft = copy.deepcopy(model).to(device) + + data_adv = Data( + x=data.x.clone().detach().to(device), + edge_index=data.edge_index.clone().to(device), + y=data.y.clone().to(device) + ) + data_adv.x.requires_grad = True + opt = torch.optim.Adam(model_ft.parameters(), lr=lr, weight_decay=5e-4) + + for epoch in range(epochs): + model_ft.train() + opt.zero_grad() + out = model_ft(data_adv.x, data_adv.edge_index) + loss = F.cross_entropy(out[train_mask], data_adv.y[train_mask]) + loss.backward() + + with torch.no_grad(): + if data_adv.x.grad is not None: + epsilon = 0.02 * (epoch + 1) / epochs + grad_sign = data_adv.x.grad.sign() + data_adv.x.data = data_adv.x.data + epsilon * grad_sign + data_adv.x.grad.zero_() + + opt.step() + return model_ft + +# ----------------------------- +# Ownership verifier training +# ----------------------------- +def train_ownership_verifier(data, setting, device="cpu"): + in_dim, out_dim = data.num_features, len(torch.unique(data.y)) + Fs, Find, lFs, lFind = get_setting_architectures(setting) + + owner_vecs, independent_vecs = [], [] + + # Owner models + for seed in SEEDS: + set_seed(seed) + mask = torch.randperm(data.num_nodes)[:int(0.6 * data.num_nodes)] + train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + train_mask[mask] = True + for arch in Fs: + m = build_model(arch, in_dim, out_dim, lFs) + m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=device) + owner_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) + + # Independent models + for seed in SEEDS: + set_seed(seed + 100) + mask = torch.randperm(data.num_nodes)[:int(0.3 * data.num_nodes)] + ind_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + ind_mask[mask] = True + for arch in Find: + m = build_model(arch, in_dim, out_dim, lFind) + m = train_model(m, data, ind_mask, epochs=INDEPENDENT_MODEL_EPOCHS, device=device) + independent_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) + + X_owner_np = np.vstack(owner_vecs) + X_ind_np = np.vstack(independent_vecs) + + # Reduce to 128-d + X_all = np.vstack([X_owner_np, X_ind_np]) + n_samples, n_features = X_all.shape + n_comp = min(128, n_samples, n_features) + if n_comp < n_features: + pca = PCA(n_components=n_comp) + X_all = pca.fit_transform(X_all) + if X_all.shape[1] < 128: + padding = np.zeros((X_all.shape[0], 128 - X_all.shape[1])) + X_all = np.hstack([X_all, padding]) + + n_owner = len(owner_vecs) + X_owner_np = X_all[:n_owner] + X_ind_np = X_all[n_owner:] + + # Train classifier + X_train = torch.tensor(X_all, dtype=torch.float32, device=device) + y_train = torch.tensor(np.hstack([np.ones(n_owner), np.zeros(len(X_ind_np))]), + dtype=torch.long, device=device) + cown = COwn(input_dim=128).to(device) + opt = torch.optim.Adam(cown.parameters(), lr=0.001) + + for epoch in range(COWN_TRAIN_EPOCHS): + cown.train() + opt.zero_grad() + logits = cown(X_train) + loss = F.cross_entropy(logits, y_train) + loss.backward() + opt.step() + + return cown, X_owner_np, X_ind_np + +# ----------------------------- +# Eval metrics (FPR, FNR, ACC) +# ----------------------------- +def evaluate_cown(cown, X_owner_np, X_ind_np, device="cpu"): + X_owner = torch.tensor(X_owner_np, dtype=torch.float32, device=device) + X_ind = torch.tensor(X_ind_np, dtype=torch.float32, device=device) + + cown.eval() + with torch.no_grad(): + preds_owner = cown(X_owner).argmax(dim=1).cpu().numpy() + preds_ind = cown(X_ind).argmax(dim=1).cpu().numpy() + + fnr = (preds_owner == 0).mean() * 100 + fpr = (preds_ind == 1).mean() * 100 + acc = ( (preds_owner == 1).sum() + (preds_ind == 0).sum() ) / (len(preds_owner)+len(preds_ind)) * 100 + return fpr, fnr, acc + +# ----------------------------- +# False positives (Table 7) +# ----------------------------- +def run_false_positive_experiment(data_orig, dataset_name, setting, cown, node_order, device="cpu", repeats=5): + in_dim, out_dim = data_orig.num_features, len(torch.unique(data_orig.y)) + Fs, Find, lFs, lFind = get_setting_architectures(setting) + + fpr_list = [] + for rep in range(repeats): + set_seed(rep + 500) + num_nodes = data_orig.num_nodes + independent_train = torch.randperm(num_nodes)[:int(0.3 * num_nodes)] + independent_mask = torch.zeros(num_nodes, dtype=torch.bool) + independent_mask[independent_train] = True + + independent_vecs = [] + for arch in Find: + m = build_model(arch, in_dim, out_dim, lFind) + m = train_model(m, data_orig, independent_mask, epochs=INDEPENDENT_MODEL_EPOCHS, device=device) + independent_vecs.append(model_to_vector_probs(m, data_orig, node_order)) + + X_independent_np = np.vstack(independent_vecs) + n_samples, n_features = X_independent_np.shape + n_comp = min(128, n_samples, n_features) + if n_comp < n_features: + pca = PCA(n_components=n_comp) + X_independent_np = pca.fit_transform(X_independent_np) + if X_independent_np.shape[1] < 128: + padding = np.zeros((X_independent_np.shape[0], 128 - X_independent_np.shape[1])) + X_independent_np = np.hstack([X_independent_np, padding]) + + X_independent = torch.tensor(X_independent_np, dtype=torch.float32, device=device) + cown.eval() + with torch.no_grad(): + preds = cown(X_independent).argmax(dim=1).cpu().numpy() + + fpr = (preds == 1).mean() * 100 + fpr_list.append(fpr) + + return np.mean(fpr_list), np.std(fpr_list) + +# ----------------------------- +# Generate Table 6 and Table 7 +# ----------------------------- +def generate_tables(all_results_csv="results/table5_all_results.csv"): + df = pd.read_csv(all_results_csv) + if "cown_acc_mean" not in df.columns: + raise KeyError("Expected 'cown_acc_mean' in all_results.csv") + + os.makedirs("results", exist_ok=True) + table6, table7 = [], [] + + for (ds, st, md), sub in df.groupby(["dataset", "setting", "mode"]): + print(f"\n=== {ds} / Setting {st} / Mode {md} ===") + + data, _ = load_dataset(ds, device=DEVICE) + num_nodes = data.num_nodes + train_nodes = torch.randperm(num_nodes)[:int(0.6 * num_nodes)] + train_mask = torch.zeros(num_nodes, dtype=torch.bool) + train_mask[train_nodes] = True + + # Train + fine-tune + Fs, Find, lFs, lFind = get_setting_architectures(st) + target_arch = Fs[0] if len(Fs) > 0 else "GCN" + m = build_model(target_arch, data.num_features, len(torch.unique(data.y)), lFs) + m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=DEVICE) + m_finetuned = finetune_model(m, data, train_mask, epochs=FINETUNE_EPOCHS, device=DEVICE) + + ori_acc = (m(data.x.to(DEVICE), data.edge_index.to(DEVICE)).argmax(dim=1) == data.y.to(DEVICE)).float().mean().item() * 100 + + # Train C_own + trained_cown, X_owner_np, X_ind_np = train_ownership_verifier(data, st, device=DEVICE) + fpr, fnr, acc_cown = evaluate_cown(trained_cown, X_owner_np, X_ind_np, device=DEVICE) + + # Table 6 + table6.append({ + "Dataset": ds, "Setting": st, "Mode": md, + "Ori_ACC(%)": round(ori_acc, 2), + "FPR(%)": round(fpr, 2), + "FNR(%)": round(fnr, 2), + "Fine_ACC(%)": round(acc_cown, 2) + }) + + # Table 7 + node_order = torch.arange(data.num_nodes) + fpr_mean, fpr_std = run_false_positive_experiment(data, ds, st, trained_cown, node_order, device=DEVICE) + table7.append({ + "Dataset": ds, "Setting": st, "Mode": md, + "FPR": f"{fpr_mean:.2f} ± {fpr_std:.2f}" + }) + + pd.DataFrame(table6).to_csv("results/table6.csv", index=False) + pd.DataFrame(table7).to_csv("results/table7.csv", index=False) + print("\n✅ Saved results/table6.csv and table7.csv") + + +# ----------------------------- +if __name__ == "__main__": + generate_tables() diff --git a/implementation/run_bboxve.py b/implementation/run_bboxve.py new file mode 100644 index 0000000..79f544a --- /dev/null +++ b/implementation/run_bboxve.py @@ -0,0 +1,164 @@ +""" +run_bboxve.py — Backdoor-based Ownership Verification (BBoxVe) in PyG. + +This script: +- Injects a backdoor watermark trigger into node features. +- Trains a target model and an extracted surrogate model. +- Evaluates clean and backdoor performance (TCA, TBA, ECA, EBA). +- Loops over datasets and models automatically. +- Saves all results to results/BboxVe_results.csv +""" + +import os, sys +import torch +import random +import numpy as np +import pandas as pd +import torch.nn.functional as F +from torch_geometric.datasets import Planetoid +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from pygip.models.nn.pyg_backbones import GCN, GAT, GraphSAGE, GIN, SGC + +# from torch_geometric.nn import GINConv, SGConv +import torch.nn as nn + + + + + +# ---------------------------- +# Helpers +# ---------------------------- +def set_seed(seed=0): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + + +def inject_backdoor(data, node_indices, num_features, fixed_val=10, trigger_size=35): + """Inject backdoor trigger on selected nodes.""" + poisoned_x = data.x.clone() + poisoned_y = data.y.clone() + least_class = torch.bincount(data.y).argmin() + + for idx in node_indices: + feat_ids = torch.randperm(num_features)[:trigger_size] + poisoned_x[idx, feat_ids] = fixed_val + poisoned_y[idx] = least_class + + return poisoned_x, poisoned_y + + +def train_model(model, data, train_idx, epochs=50, lr=0.01, device="cpu"): + model = model.to(device) + data = data.to(device) + opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) + + for epoch in range(epochs): + model.train() + opt.zero_grad() + out = model(data.x, data.edge_index) + loss = F.cross_entropy(out[train_idx], data.y[train_idx]) + loss.backward() + opt.step() + + return model + + +def evaluate(model, data, clean_idx, backdoor_idx): + model.eval() + with torch.no_grad(): + logits = model(data.x, data.edge_index) + preds = logits.argmax(dim=1) + + clean_acc = (preds[clean_idx] == data.y[clean_idx]).float().mean().item() + backdoor_acc = (preds[backdoor_idx] == data.y[backdoor_idx]).float().mean().item() + + return clean_acc * 100, backdoor_acc * 100 + + +# ---------------------------- +# Main Experiment +# ---------------------------- +def run_experiment(dataset_name, model_type, with_backdoor=True, device="cpu"): + dataset = Planetoid(root=f"data/{dataset_name}", name=dataset_name) + data = dataset[0].to(device) + num_nodes = data.num_nodes + + idx = torch.randperm(num_nodes) + train_idx = idx[: int(0.2 * num_nodes)] + surr_idx = idx[int(0.2 * num_nodes): int(0.6 * num_nodes)] + test_idx = idx[int(0.6 * num_nodes):] + + bd_train_idx = train_idx[torch.randperm(len(train_idx))[: int(0.15 * len(train_idx))]] + bd_test_idx = test_idx[torch.randperm(len(test_idx))[: int(0.10 * len(test_idx))]] + + if with_backdoor: + data.x, data.y = inject_backdoor(data, bd_train_idx, dataset.num_features) + data.x, data.y = inject_backdoor(data, bd_test_idx, dataset.num_features) + + # Select model + if model_type == "GCN": + model_fn = lambda: GCN(dataset.num_features, 64, dataset.num_classes) + elif model_type == "GAT": + model_fn = lambda: GAT(dataset.num_features, 64, dataset.num_classes) + elif model_type == "GraphSAGE": + model_fn = lambda: GraphSAGE(dataset.num_features, 64, dataset.num_classes) + elif model_type == "GIN": + model_fn = lambda: GIN(dataset.num_features, 64, dataset.num_classes) + elif model_type == "SGC": + model_fn = lambda: SGC(dataset.num_features, dataset.num_classes) + else: + raise ValueError(f"Unknown model type: {model_type}") + + target = train_model(model_fn(), data, train_idx, device=device) + + surr_data = data if with_backdoor else dataset[0].clone() + surrogate = train_model(model_fn(), surr_data, surr_idx, device=device) + + clean_idx = torch.tensor(list(set(test_idx.tolist()) - set(bd_test_idx.tolist())), dtype=torch.long) + TCA, TBA = evaluate(target, data, clean_idx, bd_test_idx) + ECA, EBA = evaluate(surrogate, data, clean_idx, bd_test_idx) + + return { + "Dataset": dataset_name, + "Model": model_type, + "Setting": "With Backdoor" if with_backdoor else "Without Backdoor", + "TCA": TCA, + "ECA": ECA, + "TBA": TBA, + "EBA": EBA + } + + +# ---------------------------- +# Runner +# ---------------------------- +if __name__ == "__main__": + set_seed(0) + device = "cuda" if torch.cuda.is_available() else "cpu" + os.makedirs("results", exist_ok=True) + out_file = "results/BboxVe_results.csv" + + datasets = ["Cora", "CiteSeer", "PubMed"] + models = ["GCN", "GAT", "GraphSAGE", "GIN", "SGC"] + + all_results = [] + + for dataset in datasets: + for model_type in models: + print(f"\n=== Running {dataset} | {model_type} | With Backdoor ===") + res = run_experiment(dataset, model_type, with_backdoor=True, device=device) + all_results.append(res) + + df = pd.DataFrame(all_results) + if os.path.exists(out_file): + df.to_csv(out_file, mode="a", header=False, index=False) + else: + df.to_csv(out_file, index=False) + + print("\n=== All Table 3 Rows Added ===") + print(df) diff --git a/implementation/run_bgrove.py b/implementation/run_bgrove.py new file mode 100644 index 0000000..4789494 --- /dev/null +++ b/implementation/run_bgrove.py @@ -0,0 +1,236 @@ +""" +examples/run_bgrove.py + +Integration of BGrOVe experiment (Table 4 reproduction) using PyGIP datasets and models. +- Preserves original evaluation: FPR, FNR, ACC +- Uses same dataset/model structure and device conventions as the main framework +""" + +import os +import sys +import random +import numpy as np +import pandas as pd +import torch +import torch.nn.functional as F +from sklearn.metrics.pairwise import cosine_similarity + +# ensure project root is importable +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +# ------------------------------------------------ +# PyGIP integration (dataset + models) +# ------------------------------------------------ +try: + from pygip.datasets.pyg_datasets import Cora, CiteSeer, PubMed, DBLP, Amazon + from pygip.models.nn.pyg_backbones import GCN, GAT, GraphSAGE, GIN, SGC +except ImportError as e: + raise ImportError("Please ensure pygip is in PYTHONPATH before running this script.") from e + + +# ------------------------------------------------ +# Helpers +# ------------------------------------------------ +def get_device(): + """Return cuda if available, else cpu.""" + return torch.device("cuda" if torch.cuda.is_available() else "cpu") + +def set_seed(seed=0): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + +def train_model(model, data, train_mask, epochs=50, lr=0.01, device=None): + """Train a simple model for fixed epochs.""" + device = device or get_device() + model = model.to(device) + data = data.to(device) + optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) + for epoch in range(epochs): + model.train() + optimizer.zero_grad() + out = model(data.x, data.edge_index) + loss = F.cross_entropy(out[train_mask], data.y[train_mask]) + loss.backward() + optimizer.step() + return model + +def get_posteriors(model, data, nodes, device=None): + """Get softmax posteriors for given node indices.""" + device = device or get_device() + model = model.to(device) + data = data.to(device) + model.eval() + with torch.no_grad(): + logits = model(data.x, data.edge_index)[nodes] + probs = F.softmax(logits, dim=1).cpu().numpy() + return probs + +def compute_metrics(true_labels, pred_labels): + """Compute FPR, FNR, ACC.""" + true_labels = np.array(true_labels) + pred_labels = np.array(pred_labels) + FP = np.sum((pred_labels == 1) & (true_labels == 0)) + FN = np.sum((pred_labels == 0) & (true_labels == 1)) + TN = np.sum((pred_labels == 0) & (true_labels == 0)) + TP = np.sum((pred_labels == 1) & (true_labels == 1)) + FPR = FP / (FP + TN + 1e-8) * 100 + FNR = FN / (FN + TP + 1e-8) * 100 + ACC = (TP + TN) / (TP + TN + FP + FN + 1e-8) * 100 + return FPR, FNR, ACC + + +# ------------------------------------------------ +# Model Builder +# ------------------------------------------------ +def build_model(model_type, in_dim, out_dim, layers=2): + """Return the desired model backbone.""" + if model_type == "GCN": + return GCN(in_dim, 16, out_dim) + elif model_type == "GraphSAGE": + return GraphSAGE(in_dim, 16, out_dim) + elif model_type == "GAT": + return GAT(in_dim, 16, out_dim) + elif model_type == "GIN": + return GIN(in_dim, 16, out_dim) + elif model_type == "SGC": + return SGC(in_dim, out_dim) + else: + raise ValueError(f"Unknown model type: {model_type}") + + +# ------------------------------------------------ +# Threshold tuning +# ------------------------------------------------ +def tune_threshold(Fs_star, Fs, Find, data, query_nodes, device=None): + """Find similarity threshold maximizing accuracy.""" + device = device or get_device() + scores, labels = [], [] + + for star in Fs_star: + probs_star = get_posteriors(star, data, query_nodes, device=device) + # genuine (1) + for surrogate in Fs: + sim = cosine_similarity(probs_star, get_posteriors(surrogate, data, query_nodes, device=device)).mean() + scores.append(sim) + labels.append(1) + # impostors (0) + for ind in Find: + sim = cosine_similarity(probs_star, get_posteriors(ind, data, query_nodes, device=device)).mean() + scores.append(sim) + labels.append(0) + + best_thr, best_acc = 0.5, 0 + for thr in np.linspace(0.1, 0.99, 50): + preds = [1 if s > thr else 0 for s in scores] + _, _, acc = compute_metrics(labels, preds) + if acc > best_acc: + best_acc, best_thr = acc, thr + return best_thr + + +# ------------------------------------------------ +# Single run (Table 4 cell) +# ------------------------------------------------ +def run_bgrove_experiment(dataset_cls, condition="CondA ✓", setting="I", device=None): + device = device or get_device() + ds = dataset_cls(path="./data") + data = ds.graph_data.to(device) + in_dim, out_dim = ds.num_features, ds.num_classes + train_mask = data.train_mask + + overlapping = ["GCN", "GAT", "GraphSAGE"] + disjoint = ["GIN", "SGC"] + layers_same, layers_diff = 2, 3 + + if setting == "I": + arch_Fs, arch_Find = overlapping, overlapping + nFs, nFind = layers_same, layers_same + elif setting == "II": + arch_Fs, arch_Find = overlapping, overlapping + nFs, nFind = layers_diff, layers_same + elif setting == "III": + arch_Fs, arch_Find = disjoint, overlapping + nFs, nFind = layers_same, layers_same + elif setting == "IV": + arch_Fs, arch_Find = disjoint, overlapping + nFs, nFind = layers_diff, layers_same + else: + raise ValueError("Invalid setting") + + target = train_model(build_model("GCN", in_dim, out_dim, 2), data, train_mask, device=device) + + Fs = [train_model(build_model(a, in_dim, out_dim, nFs), data, train_mask, device=device) + for a in arch_Fs] + set_seed(123 if condition != "CondA ✓" else 0) + Fs_star = [train_model(build_model(a, in_dim, out_dim, nFs), data, train_mask, device=device) + for a in arch_Fs] + Find = [train_model(build_model(a, in_dim, out_dim, nFind), data, train_mask, device=device) + for a in arch_Find] + + num_queries = max(1, int(0.1 * data.num_nodes)) + query_nodes = torch.randperm(data.num_nodes, device=device)[:num_queries] + thr = tune_threshold(Fs_star, Fs, Find, data, query_nodes, device=device) + + true_labels, pred_labels = [], [] + for model in Fs + Find: + for star in Fs_star: + sim = cosine_similarity( + get_posteriors(model, data, query_nodes, device=device), + get_posteriors(star, data, query_nodes, device=device) + ).mean() + true_labels.append(1 if model in Fs else 0) + pred_labels.append(1 if sim > thr else 0) + return compute_metrics(true_labels, pred_labels) + + +# ------------------------------------------------ +# Multi-seed aggregation +# ------------------------------------------------ +def run_multi(dataset_cls, condition, setting, device=None, seeds=(0, 1, 2, 3, 4)): + device = device or get_device() + all_fpr, all_fnr, all_acc = [], [], [] + for seed in seeds: + set_seed(seed) + FPR, FNR, ACC = run_bgrove_experiment(dataset_cls, condition, setting, device) + all_fpr.append(FPR) + all_fnr.append(FNR) + all_acc.append(ACC) + fmt = lambda arr: f"{np.mean(arr):.2f} ± {np.std(arr):.2f}" + return fmt(all_fpr), fmt(all_fnr), fmt(all_acc) + + +# ------------------------------------------------ +# Entry Point +# ------------------------------------------------ +if __name__ == "__main__": + device = get_device() + print(f"Using device: {device}") + + datasets = [Cora, CiteSeer, PubMed, DBLP, Amazon] + conditions = ["CondA ✓", "CondA ✗"] + settings = ["I", "II", "III", "IV"] + + total = len(datasets) * len(conditions) * len(settings) + results = {} + count = 0 + + for DatasetClass in datasets: + for cond in conditions: + for setting in settings: + count += 1 + print(f"\n=== [{count}/{total}] {DatasetClass.__name__}, {cond}, Setting {setting} ===") + FPR, FNR, ACC = run_multi(DatasetClass, cond, setting, device) + results[(DatasetClass.__name__, cond, setting)] = [FPR, FNR, ACC] + + df = pd.DataFrame.from_dict(results, orient="index", columns=["FPR (%)", "FNR (%)", "ACC (%)"]) + df.index = pd.MultiIndex.from_tuples(df.index, names=["Dataset", "Condition", "Setting"]) + + print("\n=== Table 4: BGrOVe Results (mean ± std) ===") + print(df) + os.makedirs("results", exist_ok=True) + out_path = "results/BGrOVe_table4.csv" + df.to_csv(out_path) + print(f"\n✅ Results saved to {out_path}") diff --git a/implementation/run_table5_full.py b/implementation/run_table5_full.py new file mode 100644 index 0000000..f251272 --- /dev/null +++ b/implementation/run_table5_full.py @@ -0,0 +1,288 @@ +# run_table5_full.py +# Rewritten to reproduce Figure 3 & Table 5 from Zhou et al. (2024) with aggregation + stability fixes + +import os, random, numpy as np, pandas as pd, sys +import torch, torch.nn as nn, torch.nn.functional as F +from torch_geometric.datasets import Planetoid, Amazon, CitationFull +from torch_geometric.data import Data +from sklearn.model_selection import train_test_split +import matplotlib.pyplot as plt + + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from pygip.models.nn.pyg_backbones import GCN, GAT, GraphSAGE, GIN, SGC + +# ---------------------------- +# Config +# ---------------------------- +SEEDS = [0, 1] +NUM_INDEP = 3 # fewer independent models +NUM_SURR = 3 # fewer surrogates +MODEL_TRAIN_EPOCHS = 40 +SURR_TRAIN_EPOCHS = 40 +COWN_TRAIN_EPOCHS = 20 +MASK_RATIOS = [0.0, 0.1, 0.2, 0.4] + + +# ---------------------------- +# Helpers +# ---------------------------- +def set_seed(seed=0): + random.seed(seed); np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + +def load_dataset(name, device="cpu"): + lname = name.lower() + if lname in [ "pubmed","cora","citeseer"]: + dataset = Planetoid(root=f"data/{name}", name=name) + data = dataset[0].to(device) + elif "amazon" in lname: + sub = "Photo" if "photo" in lname else "Computers" + dataset = Amazon(root=f"data/{lname}", name=sub) + data = dataset[0].to(device) + elif lname in ["dblp","db_lp","db-lp"]: + dataset = CitationFull(root="data/dblp", name="dblp") + data = dataset[0].to(device) + else: + raise ValueError(f"Unknown dataset {name}") + return data, dataset + +def split_nodes(num_nodes, ratios=(0.3,0.3,0.3,0.1), seed=0): + rng = np.random.RandomState(seed) + perm = rng.permutation(num_nodes) + sizes = [int(r*num_nodes) for r in ratios] + sizes[-1] = num_nodes - sum(sizes[:-1]) + splits, names, start = {}, ["train","dshadow","dsurr","dtest"], 0 + for name, sz in zip(names, sizes): + idx = perm[start:start+sz] + mask = torch.zeros(num_nodes, dtype=torch.bool); mask[idx] = True + splits[name] = mask; start += sz + return splits + +def filter_edges_to_mask(data, mask): + ei = data.edge_index; mask = mask.to(ei.device) + keep = ((mask[ei[0]] == True) & (mask[ei[1]] == True)) + return ei[:, keep] + +def mask_features_global(data, mask_ratio=0.1, seed=0): + x = data.x.clone(); num_feats = x.size(1) + k = max(1, int(mask_ratio * num_feats)) + rng = np.random.RandomState(seed) + feat_idx = rng.choice(num_feats, k, replace=False) + x[:, feat_idx] = 0.0 + data2 = Data(x=x, edge_index=data.edge_index.clone(), y=data.y.clone()) + return data2, feat_idx + +# ---------------------------- +# Models & Training +# ---------------------------- +def build_model(model_type, in_dim, out_dim, layers=2): + cls_map = {"GCN": GCN, "GraphSAGE": GraphSAGE, "GAT": GAT, "GIN": GIN, "SGC": SGC} + cls = cls_map[model_type] + try: + return cls(in_channels=in_dim, out_channels=out_dim, num_layers=layers) + except TypeError: + return cls(in_dim, out_dim, layers) + +def train_model(model, data, train_mask, epochs=200, lr=0.01, device="cpu"): + model = model.to(device); data = data.to(device) + opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) + for _ in range(epochs): + model.train(); opt.zero_grad() + out = model(data.x, data.edge_index) + loss = F.cross_entropy(out[train_mask], data.y[train_mask]) + loss.backward(); opt.step() + return model + +def train_with_soft_labels(model, data, train_mask, soft_targets, epochs=200, lr=0.01, device="cpu"): + model = model.to(device); data = data.to(device) + soft_targets = soft_targets.to(device) + opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) + for _ in range(epochs): + model.train(); opt.zero_grad() + out = F.log_softmax(model(data.x, data.edge_index), dim=1) + loss = F.kl_div(out[train_mask], soft_targets[train_mask], reduction='batchmean') + loss.backward(); opt.step() + return model + +def compute_accuracy(model, data, mask): + model.eval() + with torch.no_grad(): + logits = model(data.x, data.edge_index) + pred = logits.argmax(dim=1) + return (pred[mask] == data.y[mask]).float().mean().item() * 100 + +def compute_fidelity(model, target, data, mask): + model.eval(); target.eval() + with torch.no_grad(): + pred_m = model(data.x, data.edge_index).argmax(dim=1) + pred_t = target(data.x, data.edge_index).argmax(dim=1) + return (pred_m[mask] == pred_t[mask]).float().mean().item() * 100 + +# ---------------------------- +# Holistic vectors & C_own +# ---------------------------- +def model_to_vector_probs(model, data, node_order=None): + model.eval() + with torch.no_grad(): + probs = F.softmax(model(data.x, data.edge_index), dim=1).cpu() + if node_order is None: + node_order = torch.arange(probs.size(0)) + return probs[node_order].reshape(-1).numpy() + +class COwn(nn.Module): + def __init__(self, input_dim): + super().__init__() + self.net = nn.Sequential( + nn.Linear(input_dim, 128), nn.ReLU(), + nn.Linear(128, 64), nn.ReLU(), + nn.Linear(64, 2) + ) + def forward(self, x): return self.net(x) + +# ---------------------------- +# Settings mapping (I–IV) +# ---------------------------- +def get_setting_architectures(setting): + overlapping, disjoint = ["GCN","GAT","GraphSAGE"], ["GIN","SGC"] + l_same, l_diff = 2, 3 + if setting == "I": Fs, Find, lFs, lFind = overlapping, overlapping, l_same, l_same + elif setting == "II": Fs, Find, lFs, lFind = overlapping, overlapping, l_diff, l_same + elif setting == "III": Fs, Find, lFs, lFind = disjoint, overlapping, l_same, l_same + elif setting == "IV": Fs, Find, lFs, lFind = disjoint, overlapping, l_diff, l_same + else: raise ValueError("Invalid setting") + return Fs, Find, lFs, lFind + +# ---------------------------- +# Main experiment (Table 5 / Fig 3) +# ---------------------------- +def run_table5_full(dataset_name, setting="I", inductive=False, device="cpu"): + data_orig, dataset = load_dataset(dataset_name, device=device) + in_dim, out_dim = dataset.num_features, dataset.num_classes + Fs, Find, lFs, lFind = get_setting_architectures(setting) + + results = [] + for seed in SEEDS: + set_seed(seed) + splits = split_nodes(data_orig.num_nodes, seed=seed) + node_order = torch.where(splits["train"])[0] + + # baseline target + base_model = build_model("GCN", in_dim, out_dim, 2) + base_model = train_model(base_model, data_orig, splits["train"], + epochs=MODEL_TRAIN_EPOCHS, device=device) + base_acc = compute_accuracy(base_model, data_orig, splits["dtest"]) + + for mask_ratio in MASK_RATIOS: + data_masked, _ = mask_features_global(data_orig, mask_ratio, seed=seed) + + # train masked target + tgt = build_model("GCN", in_dim, out_dim, 2) + tgt = train_model(tgt, data_masked, splits["train"], epochs=MODEL_TRAIN_EPOCHS, device=device) + tgt_acc = compute_accuracy(tgt, data_masked, splits["dtest"]) + drop = base_acc - tgt_acc + print(f"[{dataset_name}-{setting}-seed{seed}] Mask={mask_ratio:.2f}, acc={tgt_acc:.2f}, drop={drop:.2f}") + + # Independents + indep_vecs, indep_accs = [], [] + for arch in Find: + for j in range(NUM_INDEP): + m = build_model(arch, in_dim, out_dim, lFind) + m = train_model(m, data_masked, splits["train"], epochs=MODEL_TRAIN_EPOCHS, device=device) + indep_accs.append(compute_accuracy(m, data_masked, splits["dtest"])) + indep_vecs.append(model_to_vector_probs(m, data_masked, node_order)) + + # Surrogates + with torch.no_grad(): + soft_all = F.softmax(tgt(data_masked.x, data_masked.edge_index), dim=1).cpu() + + surr_vecs, surr_accs, surr_fids = [], [], [] + for arch in Fs: + for j in range(NUM_SURR): + m = build_model(arch, in_dim, out_dim, lFs) + m = train_with_soft_labels(m, data_masked, splits["train"], soft_all, + epochs=SURR_TRAIN_EPOCHS, device=device) + surr_accs.append(compute_accuracy(m, data_masked, splits["dtest"])) + surr_fids.append(compute_fidelity(m, tgt, data_masked, splits["dtest"])) + surr_vecs.append(model_to_vector_probs(m, data_masked, node_order)) + + # Ownership classifier (full batch training for stability) + X = np.vstack(indep_vecs + surr_vecs) + y = np.array([0]*len(indep_vecs) + [1]*len(surr_vecs)) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.3, stratify=y, random_state=seed + ) + cown = COwn(X.shape[1]).to(device) + opt = torch.optim.Adam(cown.parameters(), lr=0.001, weight_decay=1e-4) + X_train_t, y_train_t = torch.tensor(X_train,dtype=torch.float32,device=device), torch.tensor(y_train,dtype=torch.long,device=device) + X_test_t, y_test_t = torch.tensor(X_test,dtype=torch.float32,device=device), torch.tensor(y_test,dtype=torch.long,device=device) + + for epoch in range(COWN_TRAIN_EPOCHS): + cown.train() + out = cown(X_train_t) + loss = F.cross_entropy(out, y_train_t) + opt.zero_grad(); loss.backward(); opt.step() + + with torch.no_grad(): + preds = cown(X_test_t).argmax(dim=1).cpu().numpy() + c_acc = (preds == y_test).mean()*100 + print(f"[{dataset_name}-{setting}-seed{seed}] C_own acc={c_acc:.2f}") + + # save + results.append({ + "dataset": dataset_name, + "setting": setting, + "mode": "Inductive" if inductive else "Transductive", + "seed": seed, + "mask_ratio": mask_ratio, + "target_acc": tgt_acc, + "indep_acc_mean": np.mean(indep_accs), + "surr_acc_mean": np.mean(surr_accs), + "surr_fid_mean": np.mean(surr_fids), + "cown_acc": c_acc + }) + + return pd.DataFrame(results) + +# ---------------------------- +# Driver +# ---------------------------- +if __name__ == "__main__": + os.makedirs("results", exist_ok=True) + datasets, settings = ["Cora","CiteSeer","PubMed","Amazon","dblp"], ["I","II","III","IV"] + device = "cuda" if torch.cuda.is_available() else "cpu" + all_results = [] + + for ds in datasets: + for st in settings: + for mode in [False, True]: # transductive=False, inductive=True + df = run_table5_full(dataset_name=ds, setting=st, inductive=mode, device=device) + all_results.append(df) + + all_results = pd.concat(all_results, ignore_index=True) + all_results.to_csv("results/all_results_per_seed.csv", index=False) + + # --- Aggregation for analyze_tables_extended.py --- + agg = all_results.groupby(["dataset","setting","mode"]).agg({ + "target_acc": ["mean","std"], + "indep_acc_mean": ["mean","std"], + "surr_acc_mean": ["mean","std"], + "surr_fid_mean": ["mean","std"], + "cown_acc": ["mean","std"] + }).reset_index() + + agg.columns = [ + "dataset","setting","mode", + "target_acc_mean","target_acc_std", + "indep_acc_mean","indep_acc_std", + "surr_acc_mean","surr_acc_std", + "surr_fid_mean","surr_fid_std", + "cown_acc_mean","cown_acc_std" + ] + agg.to_csv("results/table5_all_results.csv", index=False) + + print("✅ Saved results/all_results_per_seed.csv and results/table5_all_results.csv (aggregated)")