From ad703c433003631ad27dea5cf3d962bf6a7b2be0 Mon Sep 17 00:00:00 2001 From: luoluomei Date: Thu, 10 Jul 2025 11:25:50 -0500 Subject: [PATCH] Add LinkStealAttack --- example.py | 39 ++++- models/attack/link_steal_attack.py | 225 +++++++++++++++++++++++++++++ 2 files changed, 259 insertions(+), 5 deletions(-) create mode 100644 models/attack/link_steal_attack.py diff --git a/example.py b/example.py index 4b47ebd..fa83023 100644 --- a/example.py +++ b/example.py @@ -1,3 +1,12 @@ +# -*- coding: utf-8 -*- +"""example + +Automatically generated by Colab. + +Original file is located at + https://colab.research.google.com/drive/1l5EnFeHIv9HsfdSafJYwXSe_x19fQZcu +""" + from datasets import Cora dataset = Cora() @@ -28,9 +37,29 @@ # >>>>>>>>>> test SurviveWM2 -from models.defense.SurviveWM2 import OptimizedWatermarkDefense -from datasets import ENZYMES +# from models.defense.SurviveWM2 import OptimizedWatermarkDefense +# from datasets import ENZYMES + +# dataset = ENZYMES() +# defense = OptimizedWatermarkDefense(dataset, 0.25) +# defense.defend() + + +# >>>>>>>>>> test Link Steal Attack +from datasets import Cora +from pygip.attacks.link_steal_attack import LinkStealAttack + +dataset = Cora() + +attack = LinkStealAttack(dataset) +attack.configure( + shadow_datasets=["dblp"], + shadow_models=["graphsage"], + attack_ids=[1], # 1-hop attack + props=[100], + seed_num=3, + gpu=0 +) -dataset = ENZYMES() -defense = OptimizedWatermarkDefense(dataset, 0.25) -defense.defend() +results = attack.attack() +print(results) \ No newline at end of file diff --git a/models/attack/link_steal_attack.py b/models/attack/link_steal_attack.py new file mode 100644 index 0000000..cb17e26 --- /dev/null +++ b/models/attack/link_steal_attack.py @@ -0,0 +1,225 @@ +# -*- coding: utf-8 -*- +"""link_steal_attack.ipynb + +Automatically generated by Colab. + +Original file is located at + https://colab.research.google.com/drive/1oF3LAAIl38eGzLVzO-4mGcs2xi7taqKm +""" + +from pygip.attacks.base import BaseAttack +import os +import subprocess +import pandas as pd +import time +import random +import shutil + +class LinkStealAttack(BaseAttack): + """ + Link Steal Attack integrates external attack scripts (train_gnn.py and mlp_attack.py) into the PyGIP framework. + + Note: `attack_node_fraction` is required by BaseAttack but not used in this class. Instead, use `configure()` to specify actual parameters. + + Parameters: + - dataset: Dataset object from pygip.datasets + - attack_node_fraction: Required by BaseAttack, unused here (default=1.0) + - model_path: Optional pre-trained model path (not used here) + """ + def __init__(self, dataset, attack_node_fraction=1.0, model_path=None): + super().__init__(dataset, attack_node_fraction, model_path) + self.shadow_datasets = [] # list[str]: names of shadow datasets (e.g., ['dblp', 'amazon_photo']) + self.shadow_models = [] # list[str]: list of GNN model names to use (e.g., ['graphsage']) + self.attack_ids = [] # list[int]: list of attack IDs (0–9) representing attack strategies + self.props = [int(attack_node_fraction * 100)] # list[int]: proportions (%) of shadow dataset used + self.seed_num = 5 # int: number of random seeds to repeat experiments + self.gpu = 0 # int: GPU index (default 0) + + def configure(self, + shadow_datasets=None, + shadow_models=None, + attack_ids=None, + props=None, + seed_num=None, + gpu=None): + """ + Configure parameters for the attack. + + Parameters: + - shadow_datasets: list[str], names of shadow datasets + - shadow_models: list[str], model types (e.g., ['graphsage']) + - attack_ids: list[int], attack strategy IDs (0–9) + - props: list[int], proportions (%) of shadow dataset used + - seed_num: int, number of seeds to use for repeated runs + - gpu: int, GPU index to use (default=0) + """ + if shadow_datasets: self.shadow_datasets = shadow_datasets + if shadow_models: self.shadow_models = shadow_models + if attack_ids: self.attack_ids = attack_ids + if props: self.props = props + if seed_num is not None: self.seed_num = seed_num + if gpu is not None: self.gpu = gpu + + def attack(self): + """ + Execute the Link Steal attack process by: + 1. Training target and shadow models (if not cached) + 2. Running external MLP attacks using various configurations + 3. Aggregating and returning AUC results + + Returns: + - all_results: list of tuples with summary (target, shadow, model, attack_id, prop, avg_auc) + """ + model_dir = "./data/save_model/gnn" + os.makedirs(model_dir, exist_ok=True) + log_path = "./output/logs/attack_performance.txt" + result_dir = "./output/results" + os.makedirs(result_dir, exist_ok=True) + + seeds = random.sample(range(10000), self.seed_num) + + target_dataset = self.dataset.name + shadow_datasets = self.shadow_datasets + shadow_models = self.shadow_models + attack_ids = self.attack_ids + props = self.props + + print("========== Attack Configuration ==========") + print(f"Target Dataset : {target_dataset}") + print(f"Shadow Datasets : {shadow_datasets}") + print(f"Shadow Models : {shadow_models}") + print(f"Attack IDs : {attack_ids}") + print(f"Shadow Proportions : {props}") + print(f"Random Seeds : {seeds}") + print("==========================================") + + target_model_path = os.path.join(model_dir, f"inductive_{target_dataset}_graphsage_target.pth") + if not os.path.exists(target_model_path): + print(f"Training target model for {target_dataset}") + subprocess.run([ + "python", "train_gnn.py", + "--dataset", target_dataset, + "--model", "graphsage", + "--mode", "target", + "--gpu", str(self.gpu) + ], check=True) + else: + print(f"Target model already exists: {target_model_path}") + + attack_args_map = { + 0: ["--node_topology", "0-hop"], + 1: ["--node_topology", "1-hop"], + 2: ["--node_topology", "2-hop"], + 3: ["--node_topology", "0-hop", "--plus"], + 4: ["--node_topology", "1-hop", "--plus"], + 5: ["--node_topology", "2-hop", "--plus"], + 6: ["--node_topology", "1-hop", "--plus2"], + 7: ["--node_topology", "2-hop", "--plus2"], + 8: ["--node_topology", "1-hop", "--all"], + 9: ["--node_topology", "2-hop", "--all"], + } + + method_map = { + 0: "0-hop_posteriors", + 1: "1-hop_posteriors", + 2: "2-hop_posteriors", + 3: "0-hop_posteriors_node", + 4: "1-hop_posteriors_node", + 5: "2-hop_posteriors_node", + 6: "1-hop_posteriors_graph", + 7: "2-hop_posteriors_graph", + 8: "1-hop_posteriors_node_graph", + 9: "2-hop_posteriors_node_graph", + } + + all_results = [] + + for shadow_dataset in shadow_datasets: + for shadow_model in shadow_models: + for prop in props: + shadow_model_path = os.path.join(model_dir, f"inductive_{shadow_dataset}_{shadow_model}_shadow{prop}.pth") + if not os.path.exists(shadow_model_path): + print(f"Training shadow model {shadow_model} for {shadow_dataset} (prop={prop})") + subprocess.run([ + "python", "train_gnn.py", + "--dataset", shadow_dataset, + "--model", shadow_model, + "--mode", "shadow", + "--gpu", str(self.gpu), + "--prop", str(prop) + ], check=True) + else: + print(f"Shadow model already exists: {shadow_model_path}") + + compatible_shadow_model_path = os.path.join(model_dir, f"inductive_{target_dataset}_{shadow_model}_shadow{prop}.pth") + if not os.path.exists(compatible_shadow_model_path): + shutil.copy(shadow_model_path, compatible_shadow_model_path) + + for attack_id in attack_ids: + print(f"\n--- Running Attack-{attack_id} ---") + print(f"Target Dataset : {target_dataset}") + print(f"Shadow Dataset : {shadow_dataset}") + print(f"Shadow Model : {shadow_model}") + print(f"Shadow Prop (%) : {prop}") + print(f"Attack Method : {method_map[attack_id]}") + print("------------------------------------------") + + result_path = os.path.join(result_dir, f"attack{attack_id}_summary.csv") + with open(log_path, "w") as f: + pass + + aucs = [] + for seed in seeds: + print(f"Running seed {seed}") + cmd = [ + "python", "mlp_attack.py", + "--dataset", target_dataset, + "--edge_feature", "all", + "--target_model", "graphsage", + "--shadow_model", shadow_model, + "--lr", "0.006", + "--optim", "adam", + "--scheduler", + "--gpu", str(self.gpu), + "--seed", str(seed), + "--prop", str(prop) + ] + attack_args_map[attack_id] + + try: + subprocess.run(cmd, check=True) + time.sleep(1) + + with open(log_path, "r") as f: + lines = f.readlines() + matched = [ + line for line in lines + if target_dataset in line and str(seed) in line and method_map[attack_id] in line + ] + if matched: + fields = matched[-1].strip().split(",") + test_auc = float(fields[11]) + aucs.append(test_auc) + print(f"AUC = {test_auc:.4f}") + else: + print(f"No matching log for seed {seed}") + except subprocess.CalledProcessError: + print(f"Error running mlp_attack.py for seed {seed}") + + if aucs: + avg_auc = round(sum(aucs) / len(aucs), 4) + print(f"Average AUC: {avg_auc:.4f}") + df = pd.DataFrame({ + "target_dataset": [target_dataset] * len(aucs) + [target_dataset], + "shadow_dataset": [shadow_dataset] * len(aucs) + [shadow_dataset], + "shadow_model": [shadow_model] * len(aucs) + [shadow_model], + "attack_id": [attack_id] * len(aucs) + [attack_id], + "prop": [prop] * len(aucs) + [prop], + "seed": seeds + ["avg"], + "test_auc": aucs + [avg_auc] + }) + df.to_csv(result_path, mode="a", index=False, header=not os.path.exists(result_path)) + all_results.append((target_dataset, shadow_dataset, shadow_model, attack_id, prop, avg_auc)) + else: + print(f"No AUCs recorded for Attack-{attack_id} with shadow {shadow_dataset} model {shadow_model} (prop {prop})") + + return all_results \ No newline at end of file