Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 34 additions & 5 deletions example.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# -*- coding: utf-8 -*-
"""example

Automatically generated by Colab.

Original file is located at
https://colab.research.google.com/drive/1l5EnFeHIv9HsfdSafJYwXSe_x19fQZcu
"""

from datasets import Cora

dataset = Cora()
Expand Down Expand Up @@ -28,9 +37,29 @@


# >>>>>>>>>> test SurviveWM2
from models.defense.SurviveWM2 import OptimizedWatermarkDefense
from datasets import ENZYMES
# from models.defense.SurviveWM2 import OptimizedWatermarkDefense
# from datasets import ENZYMES

# dataset = ENZYMES()
# defense = OptimizedWatermarkDefense(dataset, 0.25)
# defense.defend()


# >>>>>>>>>> test Link Steal Attack
from datasets import Cora
from pygip.attacks.link_steal_attack import LinkStealAttack

dataset = Cora()

attack = LinkStealAttack(dataset)
attack.configure(
shadow_datasets=["dblp"],
shadow_models=["graphsage"],
attack_ids=[1], # 1-hop attack
props=[100],
seed_num=3,
gpu=0
)

dataset = ENZYMES()
defense = OptimizedWatermarkDefense(dataset, 0.25)
defense.defend()
results = attack.attack()
print(results)
225 changes: 225 additions & 0 deletions models/attack/link_steal_attack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
# -*- coding: utf-8 -*-
"""link_steal_attack.ipynb

Automatically generated by Colab.

Original file is located at
https://colab.research.google.com/drive/1oF3LAAIl38eGzLVzO-4mGcs2xi7taqKm
"""

from pygip.attacks.base import BaseAttack
import os
import subprocess
import pandas as pd
import time
import random
import shutil

class LinkStealAttack(BaseAttack):
"""
Link Steal Attack integrates external attack scripts (train_gnn.py and mlp_attack.py) into the PyGIP framework.

Note: `attack_node_fraction` is required by BaseAttack but not used in this class. Instead, use `configure()` to specify actual parameters.

Parameters:
- dataset: Dataset object from pygip.datasets
- attack_node_fraction: Required by BaseAttack, unused here (default=1.0)
- model_path: Optional pre-trained model path (not used here)
"""
def __init__(self, dataset, attack_node_fraction=1.0, model_path=None):
super().__init__(dataset, attack_node_fraction, model_path)
self.shadow_datasets = [] # list[str]: names of shadow datasets (e.g., ['dblp', 'amazon_photo'])
self.shadow_models = [] # list[str]: list of GNN model names to use (e.g., ['graphsage'])
self.attack_ids = [] # list[int]: list of attack IDs (0–9) representing attack strategies
self.props = [int(attack_node_fraction * 100)] # list[int]: proportions (%) of shadow dataset used
self.seed_num = 5 # int: number of random seeds to repeat experiments
self.gpu = 0 # int: GPU index (default 0)

def configure(self,
shadow_datasets=None,
shadow_models=None,
attack_ids=None,
props=None,
seed_num=None,
gpu=None):
"""
Configure parameters for the attack.

Parameters:
- shadow_datasets: list[str], names of shadow datasets
- shadow_models: list[str], model types (e.g., ['graphsage'])
- attack_ids: list[int], attack strategy IDs (0–9)
- props: list[int], proportions (%) of shadow dataset used
- seed_num: int, number of seeds to use for repeated runs
- gpu: int, GPU index to use (default=0)
"""
if shadow_datasets: self.shadow_datasets = shadow_datasets
if shadow_models: self.shadow_models = shadow_models
if attack_ids: self.attack_ids = attack_ids
if props: self.props = props
if seed_num is not None: self.seed_num = seed_num
if gpu is not None: self.gpu = gpu

def attack(self):
"""
Execute the Link Steal attack process by:
1. Training target and shadow models (if not cached)
2. Running external MLP attacks using various configurations
3. Aggregating and returning AUC results

Returns:
- all_results: list of tuples with summary (target, shadow, model, attack_id, prop, avg_auc)
"""
model_dir = "./data/save_model/gnn"
os.makedirs(model_dir, exist_ok=True)
log_path = "./output/logs/attack_performance.txt"
result_dir = "./output/results"
os.makedirs(result_dir, exist_ok=True)

seeds = random.sample(range(10000), self.seed_num)

target_dataset = self.dataset.name
shadow_datasets = self.shadow_datasets
shadow_models = self.shadow_models
attack_ids = self.attack_ids
props = self.props

print("========== Attack Configuration ==========")
print(f"Target Dataset : {target_dataset}")
print(f"Shadow Datasets : {shadow_datasets}")
print(f"Shadow Models : {shadow_models}")
print(f"Attack IDs : {attack_ids}")
print(f"Shadow Proportions : {props}")
print(f"Random Seeds : {seeds}")
print("==========================================")

target_model_path = os.path.join(model_dir, f"inductive_{target_dataset}_graphsage_target.pth")
if not os.path.exists(target_model_path):
print(f"Training target model for {target_dataset}")
subprocess.run([
"python", "train_gnn.py",
"--dataset", target_dataset,
"--model", "graphsage",
"--mode", "target",
"--gpu", str(self.gpu)
], check=True)
else:
print(f"Target model already exists: {target_model_path}")

attack_args_map = {
0: ["--node_topology", "0-hop"],
1: ["--node_topology", "1-hop"],
2: ["--node_topology", "2-hop"],
3: ["--node_topology", "0-hop", "--plus"],
4: ["--node_topology", "1-hop", "--plus"],
5: ["--node_topology", "2-hop", "--plus"],
6: ["--node_topology", "1-hop", "--plus2"],
7: ["--node_topology", "2-hop", "--plus2"],
8: ["--node_topology", "1-hop", "--all"],
9: ["--node_topology", "2-hop", "--all"],
}

method_map = {
0: "0-hop_posteriors",
1: "1-hop_posteriors",
2: "2-hop_posteriors",
3: "0-hop_posteriors_node",
4: "1-hop_posteriors_node",
5: "2-hop_posteriors_node",
6: "1-hop_posteriors_graph",
7: "2-hop_posteriors_graph",
8: "1-hop_posteriors_node_graph",
9: "2-hop_posteriors_node_graph",
}

all_results = []

for shadow_dataset in shadow_datasets:
for shadow_model in shadow_models:
for prop in props:
shadow_model_path = os.path.join(model_dir, f"inductive_{shadow_dataset}_{shadow_model}_shadow{prop}.pth")
if not os.path.exists(shadow_model_path):
print(f"Training shadow model {shadow_model} for {shadow_dataset} (prop={prop})")
subprocess.run([
"python", "train_gnn.py",
"--dataset", shadow_dataset,
"--model", shadow_model,
"--mode", "shadow",
"--gpu", str(self.gpu),
"--prop", str(prop)
], check=True)
else:
print(f"Shadow model already exists: {shadow_model_path}")

compatible_shadow_model_path = os.path.join(model_dir, f"inductive_{target_dataset}_{shadow_model}_shadow{prop}.pth")
if not os.path.exists(compatible_shadow_model_path):
shutil.copy(shadow_model_path, compatible_shadow_model_path)

for attack_id in attack_ids:
print(f"\n--- Running Attack-{attack_id} ---")
print(f"Target Dataset : {target_dataset}")
print(f"Shadow Dataset : {shadow_dataset}")
print(f"Shadow Model : {shadow_model}")
print(f"Shadow Prop (%) : {prop}")
print(f"Attack Method : {method_map[attack_id]}")
print("------------------------------------------")

result_path = os.path.join(result_dir, f"attack{attack_id}_summary.csv")
with open(log_path, "w") as f:
pass

aucs = []
for seed in seeds:
print(f"Running seed {seed}")
cmd = [
"python", "mlp_attack.py",
"--dataset", target_dataset,
"--edge_feature", "all",
"--target_model", "graphsage",
"--shadow_model", shadow_model,
"--lr", "0.006",
"--optim", "adam",
"--scheduler",
"--gpu", str(self.gpu),
"--seed", str(seed),
"--prop", str(prop)
] + attack_args_map[attack_id]

try:
subprocess.run(cmd, check=True)
time.sleep(1)

with open(log_path, "r") as f:
lines = f.readlines()
matched = [
line for line in lines
if target_dataset in line and str(seed) in line and method_map[attack_id] in line
]
if matched:
fields = matched[-1].strip().split(",")
test_auc = float(fields[11])
aucs.append(test_auc)
print(f"AUC = {test_auc:.4f}")
else:
print(f"No matching log for seed {seed}")
except subprocess.CalledProcessError:
print(f"Error running mlp_attack.py for seed {seed}")

if aucs:
avg_auc = round(sum(aucs) / len(aucs), 4)
print(f"Average AUC: {avg_auc:.4f}")
df = pd.DataFrame({
"target_dataset": [target_dataset] * len(aucs) + [target_dataset],
"shadow_dataset": [shadow_dataset] * len(aucs) + [shadow_dataset],
"shadow_model": [shadow_model] * len(aucs) + [shadow_model],
"attack_id": [attack_id] * len(aucs) + [attack_id],
"prop": [prop] * len(aucs) + [prop],
"seed": seeds + ["avg"],
"test_auc": aucs + [avg_auc]
})
df.to_csv(result_path, mode="a", index=False, header=not os.path.exists(result_path))
all_results.append((target_dataset, shadow_dataset, shadow_model, attack_id, prop, avg_auc))
else:
print(f"No AUCs recorded for Attack-{attack_id} with shadow {shadow_dataset} model {shadow_model} (prop {prop})")

return all_results