From ad703c433003631ad27dea5cf3d962bf6a7b2be0 Mon Sep 17 00:00:00 2001
From: luoluomei <h.yihan@wustl.edu>
Date: Thu, 10 Jul 2025 11:25:50 -0500
Subject: [PATCH] Add LinkStealAttack

---
 example.py                         |  39 ++++-
 models/attack/link_steal_attack.py | 225 +++++++++++++++++++++++++++++
 2 files changed, 259 insertions(+), 5 deletions(-)
 create mode 100644 models/attack/link_steal_attack.py

diff --git a/example.py b/example.py
index 4b47ebd..fa83023 100644
--- a/example.py
+++ b/example.py
@@ -1,3 +1,12 @@
+# -*- coding: utf-8 -*-
+"""example
+
+Automatically generated by Colab.
+
+Original file is located at
+    https://colab.research.google.com/drive/1l5EnFeHIv9HsfdSafJYwXSe_x19fQZcu
+"""
+
 from datasets import Cora
 
 dataset = Cora()
@@ -28,9 +37,29 @@
 
 
 # >>>>>>>>>> test SurviveWM2
-from models.defense.SurviveWM2 import OptimizedWatermarkDefense
-from datasets import ENZYMES
+# from models.defense.SurviveWM2 import OptimizedWatermarkDefense
+# from datasets import ENZYMES
+
+# dataset = ENZYMES()
+# defense = OptimizedWatermarkDefense(dataset, 0.25)
+# defense.defend()
+
+
+# >>>>>>>>>> test Link Steal Attack
+from datasets import Cora
+from pygip.attacks.link_steal_attack import LinkStealAttack
+
+dataset = Cora()
+
+attack = LinkStealAttack(dataset)
+attack.configure(
+    shadow_datasets=["dblp"],
+    shadow_models=["graphsage"],
+    attack_ids=[1],   # 1-hop attack
+    props=[100],
+    seed_num=3,
+    gpu=0
+)
 
-dataset = ENZYMES()
-defense = OptimizedWatermarkDefense(dataset, 0.25)
-defense.defend()
+results = attack.attack()
+print(results)
\ No newline at end of file
diff --git a/models/attack/link_steal_attack.py b/models/attack/link_steal_attack.py
new file mode 100644
index 0000000..cb17e26
--- /dev/null
+++ b/models/attack/link_steal_attack.py
@@ -0,0 +1,225 @@
+# -*- coding: utf-8 -*-
+"""link_steal_attack.ipynb
+
+Automatically generated by Colab.
+
+Original file is located at
+    https://colab.research.google.com/drive/1oF3LAAIl38eGzLVzO-4mGcs2xi7taqKm
+"""
+
+from pygip.attacks.base import BaseAttack
+import os
+import subprocess
+import pandas as pd
+import time
+import random
+import shutil
+
+class LinkStealAttack(BaseAttack):
+    """
+    Link Steal Attack integrates external attack scripts (train_gnn.py and mlp_attack.py) into the PyGIP framework.
+
+    Note: `attack_node_fraction` is required by BaseAttack but not used in this class. Instead, use `configure()` to specify actual parameters.
+
+    Parameters:
+    - dataset: Dataset object from pygip.datasets
+    - attack_node_fraction: Required by BaseAttack, unused here (default=1.0)
+    - model_path: Optional pre-trained model path (not used here)
+    """
+    def __init__(self, dataset, attack_node_fraction=1.0, model_path=None):
+        super().__init__(dataset, attack_node_fraction, model_path)
+        self.shadow_datasets = []       # list[str]: names of shadow datasets (e.g., ['dblp', 'amazon_photo'])
+        self.shadow_models = []         # list[str]: list of GNN model names to use (e.g., ['graphsage'])
+        self.attack_ids = []            # list[int]: list of attack IDs (0–9) representing attack strategies
+        self.props = [int(attack_node_fraction * 100)]  # list[int]: proportions (%) of shadow dataset used
+        self.seed_num = 5               # int: number of random seeds to repeat experiments
+        self.gpu = 0                    # int: GPU index (default 0)
+
+    def configure(self,
+                  shadow_datasets=None,
+                  shadow_models=None,
+                  attack_ids=None,
+                  props=None,
+                  seed_num=None,
+                  gpu=None):
+        """
+        Configure parameters for the attack.
+
+        Parameters:
+        - shadow_datasets: list[str], names of shadow datasets
+        - shadow_models: list[str], model types (e.g., ['graphsage'])
+        - attack_ids: list[int], attack strategy IDs (0–9)
+        - props: list[int], proportions (%) of shadow dataset used
+        - seed_num: int, number of seeds to use for repeated runs
+        - gpu: int, GPU index to use (default=0)
+        """
+        if shadow_datasets: self.shadow_datasets = shadow_datasets
+        if shadow_models: self.shadow_models = shadow_models
+        if attack_ids: self.attack_ids = attack_ids
+        if props: self.props = props
+        if seed_num is not None: self.seed_num = seed_num
+        if gpu is not None: self.gpu = gpu
+
+    def attack(self):
+        """
+        Execute the Link Steal attack process by:
+        1. Training target and shadow models (if not cached)
+        2. Running external MLP attacks using various configurations
+        3. Aggregating and returning AUC results
+
+        Returns:
+        - all_results: list of tuples with summary (target, shadow, model, attack_id, prop, avg_auc)
+        """
+        model_dir = "./data/save_model/gnn"
+        os.makedirs(model_dir, exist_ok=True)
+        log_path = "./output/logs/attack_performance.txt"
+        result_dir = "./output/results"
+        os.makedirs(result_dir, exist_ok=True)
+
+        seeds = random.sample(range(10000), self.seed_num)
+
+        target_dataset = self.dataset.name
+        shadow_datasets = self.shadow_datasets
+        shadow_models = self.shadow_models
+        attack_ids = self.attack_ids
+        props = self.props
+
+        print("========== Attack Configuration ==========")
+        print(f"Target Dataset       : {target_dataset}")
+        print(f"Shadow Datasets      : {shadow_datasets}")
+        print(f"Shadow Models        : {shadow_models}")
+        print(f"Attack IDs           : {attack_ids}")
+        print(f"Shadow Proportions   : {props}")
+        print(f"Random Seeds         : {seeds}")
+        print("==========================================")
+
+        target_model_path = os.path.join(model_dir, f"inductive_{target_dataset}_graphsage_target.pth")
+        if not os.path.exists(target_model_path):
+            print(f"Training target model for {target_dataset}")
+            subprocess.run([
+                "python", "train_gnn.py",
+                "--dataset", target_dataset,
+                "--model", "graphsage",
+                "--mode", "target",
+                "--gpu", str(self.gpu)
+            ], check=True)
+        else:
+            print(f"Target model already exists: {target_model_path}")
+
+        attack_args_map = {
+            0: ["--node_topology", "0-hop"],
+            1: ["--node_topology", "1-hop"],
+            2: ["--node_topology", "2-hop"],
+            3: ["--node_topology", "0-hop", "--plus"],
+            4: ["--node_topology", "1-hop", "--plus"],
+            5: ["--node_topology", "2-hop", "--plus"],
+            6: ["--node_topology", "1-hop", "--plus2"],
+            7: ["--node_topology", "2-hop", "--plus2"],
+            8: ["--node_topology", "1-hop", "--all"],
+            9: ["--node_topology", "2-hop", "--all"],
+        }
+
+        method_map = {
+            0: "0-hop_posteriors",
+            1: "1-hop_posteriors",
+            2: "2-hop_posteriors",
+            3: "0-hop_posteriors_node",
+            4: "1-hop_posteriors_node",
+            5: "2-hop_posteriors_node",
+            6: "1-hop_posteriors_graph",
+            7: "2-hop_posteriors_graph",
+            8: "1-hop_posteriors_node_graph",
+            9: "2-hop_posteriors_node_graph",
+        }
+
+        all_results = []
+
+        for shadow_dataset in shadow_datasets:
+            for shadow_model in shadow_models:
+                for prop in props:
+                    shadow_model_path = os.path.join(model_dir, f"inductive_{shadow_dataset}_{shadow_model}_shadow{prop}.pth")
+                    if not os.path.exists(shadow_model_path):
+                        print(f"Training shadow model {shadow_model} for {shadow_dataset} (prop={prop})")
+                        subprocess.run([
+                            "python", "train_gnn.py",
+                            "--dataset", shadow_dataset,
+                            "--model", shadow_model,
+                            "--mode", "shadow",
+                            "--gpu", str(self.gpu),
+                            "--prop", str(prop)
+                        ], check=True)
+                    else:
+                        print(f"Shadow model already exists: {shadow_model_path}")
+
+                    compatible_shadow_model_path = os.path.join(model_dir, f"inductive_{target_dataset}_{shadow_model}_shadow{prop}.pth")
+                    if not os.path.exists(compatible_shadow_model_path):
+                        shutil.copy(shadow_model_path, compatible_shadow_model_path)
+
+                    for attack_id in attack_ids:
+                        print(f"\n--- Running Attack-{attack_id} ---")
+                        print(f"Target Dataset       : {target_dataset}")
+                        print(f"Shadow Dataset       : {shadow_dataset}")
+                        print(f"Shadow Model         : {shadow_model}")
+                        print(f"Shadow Prop (%)      : {prop}")
+                        print(f"Attack Method        : {method_map[attack_id]}")
+                        print("------------------------------------------")
+
+                        result_path = os.path.join(result_dir, f"attack{attack_id}_summary.csv")
+                        with open(log_path, "w") as f:
+                            pass
+
+                        aucs = []
+                        for seed in seeds:
+                            print(f"Running seed {seed}")
+                            cmd = [
+                                "python", "mlp_attack.py",
+                                "--dataset", target_dataset,
+                                "--edge_feature", "all",
+                                "--target_model", "graphsage",
+                                "--shadow_model", shadow_model,
+                                "--lr", "0.006",
+                                "--optim", "adam",
+                                "--scheduler",
+                                "--gpu", str(self.gpu),
+                                "--seed", str(seed),
+                                "--prop", str(prop)
+                            ] + attack_args_map[attack_id]
+
+                            try:
+                                subprocess.run(cmd, check=True)
+                                time.sleep(1)
+
+                                with open(log_path, "r") as f:
+                                    lines = f.readlines()
+                                matched = [
+                                    line for line in lines
+                                    if target_dataset in line and str(seed) in line and method_map[attack_id] in line
+                                ]
+                                if matched:
+                                    fields = matched[-1].strip().split(",")
+                                    test_auc = float(fields[11])
+                                    aucs.append(test_auc)
+                                    print(f"AUC = {test_auc:.4f}")
+                                else:
+                                    print(f"No matching log for seed {seed}")
+                            except subprocess.CalledProcessError:
+                                print(f"Error running mlp_attack.py for seed {seed}")
+
+                        if aucs:
+                            avg_auc = round(sum(aucs) / len(aucs), 4)
+                            print(f"Average AUC: {avg_auc:.4f}")
+                            df = pd.DataFrame({
+                                "target_dataset": [target_dataset] * len(aucs) + [target_dataset],
+                                "shadow_dataset": [shadow_dataset] * len(aucs) + [shadow_dataset],
+                                "shadow_model": [shadow_model] * len(aucs) + [shadow_model],
+                                "attack_id": [attack_id] * len(aucs) + [attack_id],
+                                "prop": [prop] * len(aucs) + [prop],
+                                "seed": seeds + ["avg"],
+                                "test_auc": aucs + [avg_auc]
+                            })
+                            df.to_csv(result_path, mode="a", index=False, header=not os.path.exists(result_path))
+                            all_results.append((target_dataset, shadow_dataset, shadow_model, attack_id, prop, avg_auc))
+                        else:
+                            print(f"No AUCs recorded for Attack-{attack_id} with shadow {shadow_dataset} model {shadow_model} (prop {prop})")
+
+        return all_results
\ No newline at end of file