Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions examples/run_table3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
Example Script: run_example_bboxve.py
--------------------------------------
Demonstrates how to run the BBoxVe (Backdoor-based Ownership Verification)
experiment from Table 3 using PyGIP.
"""

import torch
from implementation.run_bboxve import run_experiment

def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
res = run_experiment("Cora", "GCN", with_backdoor=True, device=device)
print("\n=== Single-run Result (Table 3 Example) ===")
print(res)

if __name__ == "__main__":
main()
19 changes: 19 additions & 0 deletions examples/run_table4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
Example Script: run_example_bgrove.py
--------------------------------------
Demonstrates how to reproduce one configuration of the BGrOVe
experiment (Table 4).
"""

import torch
from implementation.run_bgrove import run_bgrove_experiment
from pygip.datasets.pyg_datasets import Cora

def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
res = run_bgrove_experiment(Cora, condition="CondA ✓", setting="I", device=device)
print("\n=== Single-run Result (Table 4 Example) ===")
print("FPR, FNR, ACC =", res)

if __name__ == "__main__":
main()
18 changes: 18 additions & 0 deletions examples/run_table5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""
Example Script: run_example_table5.py
--------------------------------------
Demonstrates how to run the main Table 5 experiment (and Figure 3)
using the unified training pipeline.
"""

import torch
from implementation.run_table5_full import run_table5_full

def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
df = run_table5_full(dataset_name="Cora", setting="I", device=device)
print("\n=== Single-run Result (Table 5 Example) ===")
print(df.head())

if __name__ == "__main__":
main()
15 changes: 15 additions & 0 deletions examples/run_table6_7.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""
Example Script: run_example_analyze_extended.py
--------------------------------------
Runs the analysis that produces Table 6 (fine-tuning robustness)
and Table 7 (false positives).
"""

from implementation.adversial import generate_tables

def main():
print("Running analysis for Tables 6 & 7 ...")
generate_tables("results/table5_all_results.csv")

if __name__ == "__main__":
main()
14 changes: 14 additions & 0 deletions examples/run_table8.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""
Example Script: run_example_double_extraction.py
--------------------------------------
Demonstrates how to reproduce Table 8 (Double Extraction Robustness).
"""

from implementation.adversial_table8 import generate_table8

def main():
print("Running Double Extraction analysis (Table 8) ...")
generate_table8("results/table5_all_results.csv")

if __name__ == "__main__":
main()
240 changes: 240 additions & 0 deletions implementation/adversial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
# analyze_tables_extended.py
# Reproduce Table 6 (Fine-tuning robustness) and Table 7 (False positives)
# Matches Zhou et al. 2024 format

import os, sys, copy
import numpy as np, pandas as pd
import torch, torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.utils import subgraph
from sklearn.decomposition import PCA

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from run_table5 import (
load_dataset, set_seed, build_model,
train_model, model_to_vector_probs, get_setting_architectures, COwn
)

# -----------------------------
# Config
# -----------------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_TRAIN_EPOCHS = 80
COWN_TRAIN_EPOCHS = 40
FINETUNE_EPOCHS = 20
INDEPENDENT_MODEL_EPOCHS = 40
SEEDS = [0, 1, 2]

# -----------------------------
# Fine-tuning (FGSM-like)
# -----------------------------
def finetune_model(model, data, train_mask, epochs=20, lr=0.005, device="cpu"):
model_ft = copy.deepcopy(model).to(device)

data_adv = Data(
x=data.x.clone().detach().to(device),
edge_index=data.edge_index.clone().to(device),
y=data.y.clone().to(device)
)
data_adv.x.requires_grad = True
opt = torch.optim.Adam(model_ft.parameters(), lr=lr, weight_decay=5e-4)

for epoch in range(epochs):
model_ft.train()
opt.zero_grad()
out = model_ft(data_adv.x, data_adv.edge_index)
loss = F.cross_entropy(out[train_mask], data_adv.y[train_mask])
loss.backward()

with torch.no_grad():
if data_adv.x.grad is not None:
epsilon = 0.02 * (epoch + 1) / epochs
grad_sign = data_adv.x.grad.sign()
data_adv.x.data = data_adv.x.data + epsilon * grad_sign
data_adv.x.grad.zero_()

opt.step()
return model_ft

# -----------------------------
# Ownership verifier training
# -----------------------------
def train_ownership_verifier(data, setting, device="cpu"):
in_dim, out_dim = data.num_features, len(torch.unique(data.y))
Fs, Find, lFs, lFind = get_setting_architectures(setting)

owner_vecs, independent_vecs = [], []

# Owner models
for seed in SEEDS:
set_seed(seed)
mask = torch.randperm(data.num_nodes)[:int(0.6 * data.num_nodes)]
train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
train_mask[mask] = True
for arch in Fs:
m = build_model(arch, in_dim, out_dim, lFs)
m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=device)
owner_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes)))

# Independent models
for seed in SEEDS:
set_seed(seed + 100)
mask = torch.randperm(data.num_nodes)[:int(0.3 * data.num_nodes)]
ind_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
ind_mask[mask] = True
for arch in Find:
m = build_model(arch, in_dim, out_dim, lFind)
m = train_model(m, data, ind_mask, epochs=INDEPENDENT_MODEL_EPOCHS, device=device)
independent_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes)))

X_owner_np = np.vstack(owner_vecs)
X_ind_np = np.vstack(independent_vecs)

# Reduce to 128-d
X_all = np.vstack([X_owner_np, X_ind_np])
n_samples, n_features = X_all.shape
n_comp = min(128, n_samples, n_features)
if n_comp < n_features:
pca = PCA(n_components=n_comp)
X_all = pca.fit_transform(X_all)
if X_all.shape[1] < 128:
padding = np.zeros((X_all.shape[0], 128 - X_all.shape[1]))
X_all = np.hstack([X_all, padding])

n_owner = len(owner_vecs)
X_owner_np = X_all[:n_owner]
X_ind_np = X_all[n_owner:]

# Train classifier
X_train = torch.tensor(X_all, dtype=torch.float32, device=device)
y_train = torch.tensor(np.hstack([np.ones(n_owner), np.zeros(len(X_ind_np))]),
dtype=torch.long, device=device)
cown = COwn(input_dim=128).to(device)
opt = torch.optim.Adam(cown.parameters(), lr=0.001)

for epoch in range(COWN_TRAIN_EPOCHS):
cown.train()
opt.zero_grad()
logits = cown(X_train)
loss = F.cross_entropy(logits, y_train)
loss.backward()
opt.step()

return cown, X_owner_np, X_ind_np

# -----------------------------
# Eval metrics (FPR, FNR, ACC)
# -----------------------------
def evaluate_cown(cown, X_owner_np, X_ind_np, device="cpu"):
X_owner = torch.tensor(X_owner_np, dtype=torch.float32, device=device)
X_ind = torch.tensor(X_ind_np, dtype=torch.float32, device=device)

cown.eval()
with torch.no_grad():
preds_owner = cown(X_owner).argmax(dim=1).cpu().numpy()
preds_ind = cown(X_ind).argmax(dim=1).cpu().numpy()

fnr = (preds_owner == 0).mean() * 100
fpr = (preds_ind == 1).mean() * 100
acc = ( (preds_owner == 1).sum() + (preds_ind == 0).sum() ) / (len(preds_owner)+len(preds_ind)) * 100
return fpr, fnr, acc

# -----------------------------
# False positives (Table 7)
# -----------------------------
def run_false_positive_experiment(data_orig, dataset_name, setting, cown, node_order, device="cpu", repeats=5):
in_dim, out_dim = data_orig.num_features, len(torch.unique(data_orig.y))
Fs, Find, lFs, lFind = get_setting_architectures(setting)

fpr_list = []
for rep in range(repeats):
set_seed(rep + 500)
num_nodes = data_orig.num_nodes
independent_train = torch.randperm(num_nodes)[:int(0.3 * num_nodes)]
independent_mask = torch.zeros(num_nodes, dtype=torch.bool)
independent_mask[independent_train] = True

independent_vecs = []
for arch in Find:
m = build_model(arch, in_dim, out_dim, lFind)
m = train_model(m, data_orig, independent_mask, epochs=INDEPENDENT_MODEL_EPOCHS, device=device)
independent_vecs.append(model_to_vector_probs(m, data_orig, node_order))

X_independent_np = np.vstack(independent_vecs)
n_samples, n_features = X_independent_np.shape
n_comp = min(128, n_samples, n_features)
if n_comp < n_features:
pca = PCA(n_components=n_comp)
X_independent_np = pca.fit_transform(X_independent_np)
if X_independent_np.shape[1] < 128:
padding = np.zeros((X_independent_np.shape[0], 128 - X_independent_np.shape[1]))
X_independent_np = np.hstack([X_independent_np, padding])

X_independent = torch.tensor(X_independent_np, dtype=torch.float32, device=device)
cown.eval()
with torch.no_grad():
preds = cown(X_independent).argmax(dim=1).cpu().numpy()

fpr = (preds == 1).mean() * 100
fpr_list.append(fpr)

return np.mean(fpr_list), np.std(fpr_list)

# -----------------------------
# Generate Table 6 and Table 7
# -----------------------------
def generate_tables(all_results_csv="results/table5_all_results.csv"):
df = pd.read_csv(all_results_csv)
if "cown_acc_mean" not in df.columns:
raise KeyError("Expected 'cown_acc_mean' in all_results.csv")

os.makedirs("results", exist_ok=True)
table6, table7 = [], []

for (ds, st, md), sub in df.groupby(["dataset", "setting", "mode"]):
print(f"\n=== {ds} / Setting {st} / Mode {md} ===")

data, _ = load_dataset(ds, device=DEVICE)
num_nodes = data.num_nodes
train_nodes = torch.randperm(num_nodes)[:int(0.6 * num_nodes)]
train_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[train_nodes] = True

# Train + fine-tune
Fs, Find, lFs, lFind = get_setting_architectures(st)
target_arch = Fs[0] if len(Fs) > 0 else "GCN"
m = build_model(target_arch, data.num_features, len(torch.unique(data.y)), lFs)
m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=DEVICE)
m_finetuned = finetune_model(m, data, train_mask, epochs=FINETUNE_EPOCHS, device=DEVICE)

ori_acc = (m(data.x.to(DEVICE), data.edge_index.to(DEVICE)).argmax(dim=1) == data.y.to(DEVICE)).float().mean().item() * 100

# Train C_own
trained_cown, X_owner_np, X_ind_np = train_ownership_verifier(data, st, device=DEVICE)
fpr, fnr, acc_cown = evaluate_cown(trained_cown, X_owner_np, X_ind_np, device=DEVICE)

# Table 6
table6.append({
"Dataset": ds, "Setting": st, "Mode": md,
"Ori_ACC(%)": round(ori_acc, 2),
"FPR(%)": round(fpr, 2),
"FNR(%)": round(fnr, 2),
"Fine_ACC(%)": round(acc_cown, 2)
})

# Table 7
node_order = torch.arange(data.num_nodes)
fpr_mean, fpr_std = run_false_positive_experiment(data, ds, st, trained_cown, node_order, device=DEVICE)
table7.append({
"Dataset": ds, "Setting": st, "Mode": md,
"FPR": f"{fpr_mean:.2f} ± {fpr_std:.2f}"
})

pd.DataFrame(table6).to_csv("results/table6.csv", index=False)
pd.DataFrame(table7).to_csv("results/table7.csv", index=False)
print("\n✅ Saved results/table6.csv and table7.csv")


# -----------------------------
if __name__ == "__main__":
generate_tables()
Loading