diff --git a/examples/run_table3.py b/examples/run_table3.py new file mode 100644 index 0000000..8f5cffb --- /dev/null +++ b/examples/run_table3.py @@ -0,0 +1,19 @@ +""" +Example Script: run_example_bboxve.py +-------------------------------------- +Demonstrates how to run the BBoxVe (Backdoor-based Ownership Verification) +experiment from Table 3 using PyGIP. +""" + +import torch +from implementation.run_bboxve import run_experiment + +def main(): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + res = run_experiment("Cora", "GCN", with_backdoor=True, device=device) + print("\n=== Single-run Result (Table 3 Example) ===") + print(res) + +if __name__ == "__main__": + main() diff --git a/examples/run_table4.py b/examples/run_table4.py new file mode 100644 index 0000000..b9ca170 --- /dev/null +++ b/examples/run_table4.py @@ -0,0 +1,19 @@ +""" +Example Script: run_example_bgrove.py +-------------------------------------- +Demonstrates how to reproduce one configuration of the BGrOVe +experiment (Table 4). +""" + +import torch +from implementation.run_bgrove import run_bgrove_experiment +from pygip.datasets.pyg_datasets import Cora + +def main(): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + res = run_bgrove_experiment(Cora, condition="CondA ✓", setting="I", device=device) + print("\n=== Single-run Result (Table 4 Example) ===") + print("FPR, FNR, ACC =", res) + +if __name__ == "__main__": + main() diff --git a/examples/run_table5.py b/examples/run_table5.py new file mode 100644 index 0000000..4712fac --- /dev/null +++ b/examples/run_table5.py @@ -0,0 +1,18 @@ +""" +Example Script: run_example_table5.py +-------------------------------------- +Demonstrates how to run the main Table 5 experiment (and Figure 3) +using the unified training pipeline. +""" + +import torch +from implementation.run_table5_full import run_table5_full + +def main(): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + df = run_table5_full(dataset_name="Cora", setting="I", device=device) + print("\n=== Single-run Result (Table 5 Example) ===") + print(df.head()) + +if __name__ == "__main__": + main() diff --git a/examples/run_table6_7.py b/examples/run_table6_7.py new file mode 100644 index 0000000..48bb727 --- /dev/null +++ b/examples/run_table6_7.py @@ -0,0 +1,15 @@ +""" +Example Script: run_example_analyze_extended.py +-------------------------------------- +Runs the analysis that produces Table 6 (fine-tuning robustness) +and Table 7 (false positives). +""" + +from implementation.adversial import generate_tables + +def main(): + print("Running analysis for Tables 6 & 7 ...") + generate_tables("results/table5_all_results.csv") + +if __name__ == "__main__": + main() diff --git a/examples/run_table8.py b/examples/run_table8.py new file mode 100644 index 0000000..907cd37 --- /dev/null +++ b/examples/run_table8.py @@ -0,0 +1,14 @@ +""" +Example Script: run_example_double_extraction.py +-------------------------------------- +Demonstrates how to reproduce Table 8 (Double Extraction Robustness). +""" + +from implementation.adversial_table8 import generate_table8 + +def main(): + print("Running Double Extraction analysis (Table 8) ...") + generate_table8("results/table5_all_results.csv") + +if __name__ == "__main__": + main() diff --git a/implementation/adversial.py b/implementation/adversial.py new file mode 100644 index 0000000..7099256 --- /dev/null +++ b/implementation/adversial.py @@ -0,0 +1,240 @@ +# analyze_tables_extended.py +# Reproduce Table 6 (Fine-tuning robustness) and Table 7 (False positives) +# Matches Zhou et al. 2024 format + +import os, sys, copy +import numpy as np, pandas as pd +import torch, torch.nn.functional as F +from torch_geometric.data import Data +from torch_geometric.utils import subgraph +from sklearn.decomposition import PCA + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from run_table5 import ( + load_dataset, set_seed, build_model, + train_model, model_to_vector_probs, get_setting_architectures, COwn +) + +# ----------------------------- +# Config +# ----------------------------- +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +MODEL_TRAIN_EPOCHS = 80 +COWN_TRAIN_EPOCHS = 40 +FINETUNE_EPOCHS = 20 +INDEPENDENT_MODEL_EPOCHS = 40 +SEEDS = [0, 1, 2] + +# ----------------------------- +# Fine-tuning (FGSM-like) +# ----------------------------- +def finetune_model(model, data, train_mask, epochs=20, lr=0.005, device="cpu"): + model_ft = copy.deepcopy(model).to(device) + + data_adv = Data( + x=data.x.clone().detach().to(device), + edge_index=data.edge_index.clone().to(device), + y=data.y.clone().to(device) + ) + data_adv.x.requires_grad = True + opt = torch.optim.Adam(model_ft.parameters(), lr=lr, weight_decay=5e-4) + + for epoch in range(epochs): + model_ft.train() + opt.zero_grad() + out = model_ft(data_adv.x, data_adv.edge_index) + loss = F.cross_entropy(out[train_mask], data_adv.y[train_mask]) + loss.backward() + + with torch.no_grad(): + if data_adv.x.grad is not None: + epsilon = 0.02 * (epoch + 1) / epochs + grad_sign = data_adv.x.grad.sign() + data_adv.x.data = data_adv.x.data + epsilon * grad_sign + data_adv.x.grad.zero_() + + opt.step() + return model_ft + +# ----------------------------- +# Ownership verifier training +# ----------------------------- +def train_ownership_verifier(data, setting, device="cpu"): + in_dim, out_dim = data.num_features, len(torch.unique(data.y)) + Fs, Find, lFs, lFind = get_setting_architectures(setting) + + owner_vecs, independent_vecs = [], [] + + # Owner models + for seed in SEEDS: + set_seed(seed) + mask = torch.randperm(data.num_nodes)[:int(0.6 * data.num_nodes)] + train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + train_mask[mask] = True + for arch in Fs: + m = build_model(arch, in_dim, out_dim, lFs) + m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=device) + owner_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) + + # Independent models + for seed in SEEDS: + set_seed(seed + 100) + mask = torch.randperm(data.num_nodes)[:int(0.3 * data.num_nodes)] + ind_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + ind_mask[mask] = True + for arch in Find: + m = build_model(arch, in_dim, out_dim, lFind) + m = train_model(m, data, ind_mask, epochs=INDEPENDENT_MODEL_EPOCHS, device=device) + independent_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) + + X_owner_np = np.vstack(owner_vecs) + X_ind_np = np.vstack(independent_vecs) + + # Reduce to 128-d + X_all = np.vstack([X_owner_np, X_ind_np]) + n_samples, n_features = X_all.shape + n_comp = min(128, n_samples, n_features) + if n_comp < n_features: + pca = PCA(n_components=n_comp) + X_all = pca.fit_transform(X_all) + if X_all.shape[1] < 128: + padding = np.zeros((X_all.shape[0], 128 - X_all.shape[1])) + X_all = np.hstack([X_all, padding]) + + n_owner = len(owner_vecs) + X_owner_np = X_all[:n_owner] + X_ind_np = X_all[n_owner:] + + # Train classifier + X_train = torch.tensor(X_all, dtype=torch.float32, device=device) + y_train = torch.tensor(np.hstack([np.ones(n_owner), np.zeros(len(X_ind_np))]), + dtype=torch.long, device=device) + cown = COwn(input_dim=128).to(device) + opt = torch.optim.Adam(cown.parameters(), lr=0.001) + + for epoch in range(COWN_TRAIN_EPOCHS): + cown.train() + opt.zero_grad() + logits = cown(X_train) + loss = F.cross_entropy(logits, y_train) + loss.backward() + opt.step() + + return cown, X_owner_np, X_ind_np + +# ----------------------------- +# Eval metrics (FPR, FNR, ACC) +# ----------------------------- +def evaluate_cown(cown, X_owner_np, X_ind_np, device="cpu"): + X_owner = torch.tensor(X_owner_np, dtype=torch.float32, device=device) + X_ind = torch.tensor(X_ind_np, dtype=torch.float32, device=device) + + cown.eval() + with torch.no_grad(): + preds_owner = cown(X_owner).argmax(dim=1).cpu().numpy() + preds_ind = cown(X_ind).argmax(dim=1).cpu().numpy() + + fnr = (preds_owner == 0).mean() * 100 + fpr = (preds_ind == 1).mean() * 100 + acc = ( (preds_owner == 1).sum() + (preds_ind == 0).sum() ) / (len(preds_owner)+len(preds_ind)) * 100 + return fpr, fnr, acc + +# ----------------------------- +# False positives (Table 7) +# ----------------------------- +def run_false_positive_experiment(data_orig, dataset_name, setting, cown, node_order, device="cpu", repeats=5): + in_dim, out_dim = data_orig.num_features, len(torch.unique(data_orig.y)) + Fs, Find, lFs, lFind = get_setting_architectures(setting) + + fpr_list = [] + for rep in range(repeats): + set_seed(rep + 500) + num_nodes = data_orig.num_nodes + independent_train = torch.randperm(num_nodes)[:int(0.3 * num_nodes)] + independent_mask = torch.zeros(num_nodes, dtype=torch.bool) + independent_mask[independent_train] = True + + independent_vecs = [] + for arch in Find: + m = build_model(arch, in_dim, out_dim, lFind) + m = train_model(m, data_orig, independent_mask, epochs=INDEPENDENT_MODEL_EPOCHS, device=device) + independent_vecs.append(model_to_vector_probs(m, data_orig, node_order)) + + X_independent_np = np.vstack(independent_vecs) + n_samples, n_features = X_independent_np.shape + n_comp = min(128, n_samples, n_features) + if n_comp < n_features: + pca = PCA(n_components=n_comp) + X_independent_np = pca.fit_transform(X_independent_np) + if X_independent_np.shape[1] < 128: + padding = np.zeros((X_independent_np.shape[0], 128 - X_independent_np.shape[1])) + X_independent_np = np.hstack([X_independent_np, padding]) + + X_independent = torch.tensor(X_independent_np, dtype=torch.float32, device=device) + cown.eval() + with torch.no_grad(): + preds = cown(X_independent).argmax(dim=1).cpu().numpy() + + fpr = (preds == 1).mean() * 100 + fpr_list.append(fpr) + + return np.mean(fpr_list), np.std(fpr_list) + +# ----------------------------- +# Generate Table 6 and Table 7 +# ----------------------------- +def generate_tables(all_results_csv="results/table5_all_results.csv"): + df = pd.read_csv(all_results_csv) + if "cown_acc_mean" not in df.columns: + raise KeyError("Expected 'cown_acc_mean' in all_results.csv") + + os.makedirs("results", exist_ok=True) + table6, table7 = [], [] + + for (ds, st, md), sub in df.groupby(["dataset", "setting", "mode"]): + print(f"\n=== {ds} / Setting {st} / Mode {md} ===") + + data, _ = load_dataset(ds, device=DEVICE) + num_nodes = data.num_nodes + train_nodes = torch.randperm(num_nodes)[:int(0.6 * num_nodes)] + train_mask = torch.zeros(num_nodes, dtype=torch.bool) + train_mask[train_nodes] = True + + # Train + fine-tune + Fs, Find, lFs, lFind = get_setting_architectures(st) + target_arch = Fs[0] if len(Fs) > 0 else "GCN" + m = build_model(target_arch, data.num_features, len(torch.unique(data.y)), lFs) + m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=DEVICE) + m_finetuned = finetune_model(m, data, train_mask, epochs=FINETUNE_EPOCHS, device=DEVICE) + + ori_acc = (m(data.x.to(DEVICE), data.edge_index.to(DEVICE)).argmax(dim=1) == data.y.to(DEVICE)).float().mean().item() * 100 + + # Train C_own + trained_cown, X_owner_np, X_ind_np = train_ownership_verifier(data, st, device=DEVICE) + fpr, fnr, acc_cown = evaluate_cown(trained_cown, X_owner_np, X_ind_np, device=DEVICE) + + # Table 6 + table6.append({ + "Dataset": ds, "Setting": st, "Mode": md, + "Ori_ACC(%)": round(ori_acc, 2), + "FPR(%)": round(fpr, 2), + "FNR(%)": round(fnr, 2), + "Fine_ACC(%)": round(acc_cown, 2) + }) + + # Table 7 + node_order = torch.arange(data.num_nodes) + fpr_mean, fpr_std = run_false_positive_experiment(data, ds, st, trained_cown, node_order, device=DEVICE) + table7.append({ + "Dataset": ds, "Setting": st, "Mode": md, + "FPR": f"{fpr_mean:.2f} ± {fpr_std:.2f}" + }) + + pd.DataFrame(table6).to_csv("results/table6.csv", index=False) + pd.DataFrame(table7).to_csv("results/table7.csv", index=False) + print("\n✅ Saved results/table6.csv and table7.csv") + + +# ----------------------------- +if __name__ == "__main__": + generate_tables() diff --git a/implementation/adversial_table8 b/implementation/adversial_table8 new file mode 100644 index 0000000..f8e7fa6 --- /dev/null +++ b/implementation/adversial_table8 @@ -0,0 +1,176 @@ +# Reproduce Table 8 (Double Extraction Robustness) +# Matches Zhou et al. 2024 format + +import os, sys, copy +import numpy as np, pandas as pd +import torch, torch.nn.functional as F +from torch_geometric.data import Data +from sklearn.decomposition import PCA + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from run_table5_full import ( + load_dataset, set_seed, build_model, + train_model, model_to_vector_probs, get_setting_architectures, COwn +) + +# ----------------------------- +# Config +# ----------------------------- +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +MODEL_TRAIN_EPOCHS = 80 +COWN_TRAIN_EPOCHS = 40 +EXTRACT_EPOCHS = 40 +SEEDS = [0, 1, 2] + +# ----------------------------- +# Double Extraction +# ----------------------------- +def extract_once(target_model, data, epochs=EXTRACT_EPOCHS, device="cpu"): + """Perform a single extraction attack using pseudo-labels from target_model.""" + target_model.eval() + with torch.no_grad(): + logits = target_model(data.x.to(device), data.edge_index.to(device)) + pseudo_labels = logits.argmax(dim=1).cpu() + + extracted = build_model("GCN", data.num_features, len(torch.unique(data.y)), 2) + mask = torch.ones(data.num_nodes, dtype=torch.bool) + extracted = train_model(extracted, data, mask, epochs=epochs, device=device) + return extracted + + +def double_extract_model(target_model, data, epochs=EXTRACT_EPOCHS, device="cpu"): + """Perform two rounds of extraction: F -> Ft -> Fs.""" + Ft = extract_once(target_model, data, epochs=epochs, device=device) + Fs = extract_once(Ft, data, epochs=epochs, device=device) + return Fs + + +# ----------------------------- +# Ownership verifier training +# ----------------------------- +def train_ownership_verifier(data, setting, device="cpu"): + in_dim, out_dim = data.num_features, len(torch.unique(data.y)) + Fs, Find, lFs, lFind = get_setting_architectures(setting) + owner_vecs, independent_vecs = [], [] + + # Owner models + for seed in SEEDS: + set_seed(seed) + mask = torch.randperm(data.num_nodes)[:int(0.6 * data.num_nodes)] + train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + train_mask[mask] = True + for arch in Fs: + m = build_model(arch, in_dim, out_dim, lFs) + m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=device) + owner_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) + + # Independent models + for seed in SEEDS: + set_seed(seed + 100) + mask = torch.randperm(data.num_nodes)[:int(0.3 * data.num_nodes)] + ind_mask = torch.zeros(data.num_nodes, dtype=torch.bool) + ind_mask[mask] = True + for arch in Find: + m = build_model(arch, in_dim, out_dim, lFind) + m = train_model(m, data, ind_mask, epochs=MODEL_TRAIN_EPOCHS, device=device) + independent_vecs.append(model_to_vector_probs(m, data, torch.arange(data.num_nodes))) + + X_owner_np = np.vstack(owner_vecs) + X_ind_np = np.vstack(independent_vecs) + X_all = np.vstack([X_owner_np, X_ind_np]) + + n_samples, n_features = X_all.shape + n_comp = min(128, n_samples, n_features) + if n_comp < n_features: + pca = PCA(n_components=n_comp) + X_all = pca.fit_transform(X_all) + if X_all.shape[1] < 128: + padding = np.zeros((X_all.shape[0], 128 - X_all.shape[1])) + X_all = np.hstack([X_all, padding]) + + n_owner = len(owner_vecs) + X_owner_np = X_all[:n_owner] + X_ind_np = X_all[n_owner:] + + X_train = torch.tensor(X_all, dtype=torch.float32, device=device) + y_train = torch.tensor(np.hstack([np.ones(n_owner), np.zeros(len(X_ind_np))]), + dtype=torch.long, device=device) + cown = COwn(input_dim=128).to(device) + opt = torch.optim.Adam(cown.parameters(), lr=0.001) + + for epoch in range(COWN_TRAIN_EPOCHS): + cown.train() + opt.zero_grad() + logits = cown(X_train) + loss = F.cross_entropy(logits, y_train) + loss.backward() + opt.step() + + return cown, X_owner_np, X_ind_np + + +# ----------------------------- +# Eval metrics (FPR, FNR, ACC) +# ----------------------------- +def evaluate_cown(cown, X_owner_np, X_ind_np, device="cpu"): + X_owner = torch.tensor(X_owner_np, dtype=torch.float32, device=device) + X_ind = torch.tensor(X_ind_np, dtype=torch.float32, device=device) + cown.eval() + with torch.no_grad(): + preds_owner = cown(X_owner).argmax(dim=1).cpu().numpy() + preds_ind = cown(X_ind).argmax(dim=1).cpu().numpy() + fnr = (preds_owner == 0).mean() * 100 + fpr = (preds_ind == 1).mean() * 100 + acc = ((preds_owner == 1).sum() + (preds_ind == 0).sum()) / (len(preds_owner) + len(preds_ind)) * 100 + return fpr, fnr, acc + + +# ----------------------------- +# Generate Table 8 +# ----------------------------- +def generate_table8(all_results_csv="results/table5_all_results.csv"): + df = pd.read_csv(all_results_csv) + if "cown_acc_mean" not in df.columns: + raise KeyError("Expected 'cown_acc_mean' in all_results.csv") + + os.makedirs("results", exist_ok=True) + table8 = [] + + for (ds, st, md), sub in df.groupby(["dataset", "setting", "mode"]): + print(f"\n=== {ds} / Setting {st} / Mode {md} ===") + data, _ = load_dataset(ds, device=DEVICE) + num_nodes = data.num_nodes + train_nodes = torch.randperm(num_nodes)[:int(0.6 * num_nodes)] + train_mask = torch.zeros(num_nodes, dtype=torch.bool) + train_mask[train_nodes] = True + + # Train base target + Fs, Find, lFs, lFind = get_setting_architectures(st) + target_arch = Fs[0] if len(Fs) > 0 else "GCN" + m = build_model(target_arch, data.num_features, len(torch.unique(data.y)), lFs) + m = train_model(m, data, train_mask, epochs=MODEL_TRAIN_EPOCHS, device=DEVICE) + + ori_acc = (m(data.x.to(DEVICE), data.edge_index.to(DEVICE)).argmax(dim=1) == data.y.to(DEVICE)).float().mean().item() * 100 + + # Perform double extraction + m_double = double_extract_model(m, data, epochs=EXTRACT_EPOCHS, device=DEVICE) + + # Train ownership verifier + trained_cown, X_owner_np, X_ind_np = train_ownership_verifier(data, st, device=DEVICE) + fpr, fnr, acc_cown = evaluate_cown(trained_cown, X_owner_np, X_ind_np, device=DEVICE) + + table8.append({ + "Dataset": ds, "Setting": st, "Mode": md, + "Ori_ACC(%)": round(ori_acc, 2), + "FPR(%)": round(fpr, 2), + "FNR(%)": round(fnr, 2), + "Double_ACC(%)": round(acc_cown, 2) + }) + + pd.DataFrame(table8).to_csv("results/table8.csv", index=False) + print("\n✅ Saved results/table8.csv") + + +# ----------------------------- +if __name__ == "__main__": + generate_table8() diff --git a/implementation/run_bboxve.py b/implementation/run_bboxve.py new file mode 100644 index 0000000..79f544a --- /dev/null +++ b/implementation/run_bboxve.py @@ -0,0 +1,164 @@ +""" +run_bboxve.py — Backdoor-based Ownership Verification (BBoxVe) in PyG. + +This script: +- Injects a backdoor watermark trigger into node features. +- Trains a target model and an extracted surrogate model. +- Evaluates clean and backdoor performance (TCA, TBA, ECA, EBA). +- Loops over datasets and models automatically. +- Saves all results to results/BboxVe_results.csv +""" + +import os, sys +import torch +import random +import numpy as np +import pandas as pd +import torch.nn.functional as F +from torch_geometric.datasets import Planetoid +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from pygip.models.nn.pyg_backbones import GCN, GAT, GraphSAGE, GIN, SGC + +# from torch_geometric.nn import GINConv, SGConv +import torch.nn as nn + + + + + +# ---------------------------- +# Helpers +# ---------------------------- +def set_seed(seed=0): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + + +def inject_backdoor(data, node_indices, num_features, fixed_val=10, trigger_size=35): + """Inject backdoor trigger on selected nodes.""" + poisoned_x = data.x.clone() + poisoned_y = data.y.clone() + least_class = torch.bincount(data.y).argmin() + + for idx in node_indices: + feat_ids = torch.randperm(num_features)[:trigger_size] + poisoned_x[idx, feat_ids] = fixed_val + poisoned_y[idx] = least_class + + return poisoned_x, poisoned_y + + +def train_model(model, data, train_idx, epochs=50, lr=0.01, device="cpu"): + model = model.to(device) + data = data.to(device) + opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) + + for epoch in range(epochs): + model.train() + opt.zero_grad() + out = model(data.x, data.edge_index) + loss = F.cross_entropy(out[train_idx], data.y[train_idx]) + loss.backward() + opt.step() + + return model + + +def evaluate(model, data, clean_idx, backdoor_idx): + model.eval() + with torch.no_grad(): + logits = model(data.x, data.edge_index) + preds = logits.argmax(dim=1) + + clean_acc = (preds[clean_idx] == data.y[clean_idx]).float().mean().item() + backdoor_acc = (preds[backdoor_idx] == data.y[backdoor_idx]).float().mean().item() + + return clean_acc * 100, backdoor_acc * 100 + + +# ---------------------------- +# Main Experiment +# ---------------------------- +def run_experiment(dataset_name, model_type, with_backdoor=True, device="cpu"): + dataset = Planetoid(root=f"data/{dataset_name}", name=dataset_name) + data = dataset[0].to(device) + num_nodes = data.num_nodes + + idx = torch.randperm(num_nodes) + train_idx = idx[: int(0.2 * num_nodes)] + surr_idx = idx[int(0.2 * num_nodes): int(0.6 * num_nodes)] + test_idx = idx[int(0.6 * num_nodes):] + + bd_train_idx = train_idx[torch.randperm(len(train_idx))[: int(0.15 * len(train_idx))]] + bd_test_idx = test_idx[torch.randperm(len(test_idx))[: int(0.10 * len(test_idx))]] + + if with_backdoor: + data.x, data.y = inject_backdoor(data, bd_train_idx, dataset.num_features) + data.x, data.y = inject_backdoor(data, bd_test_idx, dataset.num_features) + + # Select model + if model_type == "GCN": + model_fn = lambda: GCN(dataset.num_features, 64, dataset.num_classes) + elif model_type == "GAT": + model_fn = lambda: GAT(dataset.num_features, 64, dataset.num_classes) + elif model_type == "GraphSAGE": + model_fn = lambda: GraphSAGE(dataset.num_features, 64, dataset.num_classes) + elif model_type == "GIN": + model_fn = lambda: GIN(dataset.num_features, 64, dataset.num_classes) + elif model_type == "SGC": + model_fn = lambda: SGC(dataset.num_features, dataset.num_classes) + else: + raise ValueError(f"Unknown model type: {model_type}") + + target = train_model(model_fn(), data, train_idx, device=device) + + surr_data = data if with_backdoor else dataset[0].clone() + surrogate = train_model(model_fn(), surr_data, surr_idx, device=device) + + clean_idx = torch.tensor(list(set(test_idx.tolist()) - set(bd_test_idx.tolist())), dtype=torch.long) + TCA, TBA = evaluate(target, data, clean_idx, bd_test_idx) + ECA, EBA = evaluate(surrogate, data, clean_idx, bd_test_idx) + + return { + "Dataset": dataset_name, + "Model": model_type, + "Setting": "With Backdoor" if with_backdoor else "Without Backdoor", + "TCA": TCA, + "ECA": ECA, + "TBA": TBA, + "EBA": EBA + } + + +# ---------------------------- +# Runner +# ---------------------------- +if __name__ == "__main__": + set_seed(0) + device = "cuda" if torch.cuda.is_available() else "cpu" + os.makedirs("results", exist_ok=True) + out_file = "results/BboxVe_results.csv" + + datasets = ["Cora", "CiteSeer", "PubMed"] + models = ["GCN", "GAT", "GraphSAGE", "GIN", "SGC"] + + all_results = [] + + for dataset in datasets: + for model_type in models: + print(f"\n=== Running {dataset} | {model_type} | With Backdoor ===") + res = run_experiment(dataset, model_type, with_backdoor=True, device=device) + all_results.append(res) + + df = pd.DataFrame(all_results) + if os.path.exists(out_file): + df.to_csv(out_file, mode="a", header=False, index=False) + else: + df.to_csv(out_file, index=False) + + print("\n=== All Table 3 Rows Added ===") + print(df) diff --git a/implementation/run_bgrove.py b/implementation/run_bgrove.py new file mode 100644 index 0000000..4789494 --- /dev/null +++ b/implementation/run_bgrove.py @@ -0,0 +1,236 @@ +""" +examples/run_bgrove.py + +Integration of BGrOVe experiment (Table 4 reproduction) using PyGIP datasets and models. +- Preserves original evaluation: FPR, FNR, ACC +- Uses same dataset/model structure and device conventions as the main framework +""" + +import os +import sys +import random +import numpy as np +import pandas as pd +import torch +import torch.nn.functional as F +from sklearn.metrics.pairwise import cosine_similarity + +# ensure project root is importable +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +# ------------------------------------------------ +# PyGIP integration (dataset + models) +# ------------------------------------------------ +try: + from pygip.datasets.pyg_datasets import Cora, CiteSeer, PubMed, DBLP, Amazon + from pygip.models.nn.pyg_backbones import GCN, GAT, GraphSAGE, GIN, SGC +except ImportError as e: + raise ImportError("Please ensure pygip is in PYTHONPATH before running this script.") from e + + +# ------------------------------------------------ +# Helpers +# ------------------------------------------------ +def get_device(): + """Return cuda if available, else cpu.""" + return torch.device("cuda" if torch.cuda.is_available() else "cpu") + +def set_seed(seed=0): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + +def train_model(model, data, train_mask, epochs=50, lr=0.01, device=None): + """Train a simple model for fixed epochs.""" + device = device or get_device() + model = model.to(device) + data = data.to(device) + optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) + for epoch in range(epochs): + model.train() + optimizer.zero_grad() + out = model(data.x, data.edge_index) + loss = F.cross_entropy(out[train_mask], data.y[train_mask]) + loss.backward() + optimizer.step() + return model + +def get_posteriors(model, data, nodes, device=None): + """Get softmax posteriors for given node indices.""" + device = device or get_device() + model = model.to(device) + data = data.to(device) + model.eval() + with torch.no_grad(): + logits = model(data.x, data.edge_index)[nodes] + probs = F.softmax(logits, dim=1).cpu().numpy() + return probs + +def compute_metrics(true_labels, pred_labels): + """Compute FPR, FNR, ACC.""" + true_labels = np.array(true_labels) + pred_labels = np.array(pred_labels) + FP = np.sum((pred_labels == 1) & (true_labels == 0)) + FN = np.sum((pred_labels == 0) & (true_labels == 1)) + TN = np.sum((pred_labels == 0) & (true_labels == 0)) + TP = np.sum((pred_labels == 1) & (true_labels == 1)) + FPR = FP / (FP + TN + 1e-8) * 100 + FNR = FN / (FN + TP + 1e-8) * 100 + ACC = (TP + TN) / (TP + TN + FP + FN + 1e-8) * 100 + return FPR, FNR, ACC + + +# ------------------------------------------------ +# Model Builder +# ------------------------------------------------ +def build_model(model_type, in_dim, out_dim, layers=2): + """Return the desired model backbone.""" + if model_type == "GCN": + return GCN(in_dim, 16, out_dim) + elif model_type == "GraphSAGE": + return GraphSAGE(in_dim, 16, out_dim) + elif model_type == "GAT": + return GAT(in_dim, 16, out_dim) + elif model_type == "GIN": + return GIN(in_dim, 16, out_dim) + elif model_type == "SGC": + return SGC(in_dim, out_dim) + else: + raise ValueError(f"Unknown model type: {model_type}") + + +# ------------------------------------------------ +# Threshold tuning +# ------------------------------------------------ +def tune_threshold(Fs_star, Fs, Find, data, query_nodes, device=None): + """Find similarity threshold maximizing accuracy.""" + device = device or get_device() + scores, labels = [], [] + + for star in Fs_star: + probs_star = get_posteriors(star, data, query_nodes, device=device) + # genuine (1) + for surrogate in Fs: + sim = cosine_similarity(probs_star, get_posteriors(surrogate, data, query_nodes, device=device)).mean() + scores.append(sim) + labels.append(1) + # impostors (0) + for ind in Find: + sim = cosine_similarity(probs_star, get_posteriors(ind, data, query_nodes, device=device)).mean() + scores.append(sim) + labels.append(0) + + best_thr, best_acc = 0.5, 0 + for thr in np.linspace(0.1, 0.99, 50): + preds = [1 if s > thr else 0 for s in scores] + _, _, acc = compute_metrics(labels, preds) + if acc > best_acc: + best_acc, best_thr = acc, thr + return best_thr + + +# ------------------------------------------------ +# Single run (Table 4 cell) +# ------------------------------------------------ +def run_bgrove_experiment(dataset_cls, condition="CondA ✓", setting="I", device=None): + device = device or get_device() + ds = dataset_cls(path="./data") + data = ds.graph_data.to(device) + in_dim, out_dim = ds.num_features, ds.num_classes + train_mask = data.train_mask + + overlapping = ["GCN", "GAT", "GraphSAGE"] + disjoint = ["GIN", "SGC"] + layers_same, layers_diff = 2, 3 + + if setting == "I": + arch_Fs, arch_Find = overlapping, overlapping + nFs, nFind = layers_same, layers_same + elif setting == "II": + arch_Fs, arch_Find = overlapping, overlapping + nFs, nFind = layers_diff, layers_same + elif setting == "III": + arch_Fs, arch_Find = disjoint, overlapping + nFs, nFind = layers_same, layers_same + elif setting == "IV": + arch_Fs, arch_Find = disjoint, overlapping + nFs, nFind = layers_diff, layers_same + else: + raise ValueError("Invalid setting") + + target = train_model(build_model("GCN", in_dim, out_dim, 2), data, train_mask, device=device) + + Fs = [train_model(build_model(a, in_dim, out_dim, nFs), data, train_mask, device=device) + for a in arch_Fs] + set_seed(123 if condition != "CondA ✓" else 0) + Fs_star = [train_model(build_model(a, in_dim, out_dim, nFs), data, train_mask, device=device) + for a in arch_Fs] + Find = [train_model(build_model(a, in_dim, out_dim, nFind), data, train_mask, device=device) + for a in arch_Find] + + num_queries = max(1, int(0.1 * data.num_nodes)) + query_nodes = torch.randperm(data.num_nodes, device=device)[:num_queries] + thr = tune_threshold(Fs_star, Fs, Find, data, query_nodes, device=device) + + true_labels, pred_labels = [], [] + for model in Fs + Find: + for star in Fs_star: + sim = cosine_similarity( + get_posteriors(model, data, query_nodes, device=device), + get_posteriors(star, data, query_nodes, device=device) + ).mean() + true_labels.append(1 if model in Fs else 0) + pred_labels.append(1 if sim > thr else 0) + return compute_metrics(true_labels, pred_labels) + + +# ------------------------------------------------ +# Multi-seed aggregation +# ------------------------------------------------ +def run_multi(dataset_cls, condition, setting, device=None, seeds=(0, 1, 2, 3, 4)): + device = device or get_device() + all_fpr, all_fnr, all_acc = [], [], [] + for seed in seeds: + set_seed(seed) + FPR, FNR, ACC = run_bgrove_experiment(dataset_cls, condition, setting, device) + all_fpr.append(FPR) + all_fnr.append(FNR) + all_acc.append(ACC) + fmt = lambda arr: f"{np.mean(arr):.2f} ± {np.std(arr):.2f}" + return fmt(all_fpr), fmt(all_fnr), fmt(all_acc) + + +# ------------------------------------------------ +# Entry Point +# ------------------------------------------------ +if __name__ == "__main__": + device = get_device() + print(f"Using device: {device}") + + datasets = [Cora, CiteSeer, PubMed, DBLP, Amazon] + conditions = ["CondA ✓", "CondA ✗"] + settings = ["I", "II", "III", "IV"] + + total = len(datasets) * len(conditions) * len(settings) + results = {} + count = 0 + + for DatasetClass in datasets: + for cond in conditions: + for setting in settings: + count += 1 + print(f"\n=== [{count}/{total}] {DatasetClass.__name__}, {cond}, Setting {setting} ===") + FPR, FNR, ACC = run_multi(DatasetClass, cond, setting, device) + results[(DatasetClass.__name__, cond, setting)] = [FPR, FNR, ACC] + + df = pd.DataFrame.from_dict(results, orient="index", columns=["FPR (%)", "FNR (%)", "ACC (%)"]) + df.index = pd.MultiIndex.from_tuples(df.index, names=["Dataset", "Condition", "Setting"]) + + print("\n=== Table 4: BGrOVe Results (mean ± std) ===") + print(df) + os.makedirs("results", exist_ok=True) + out_path = "results/BGrOVe_table4.csv" + df.to_csv(out_path) + print(f"\n✅ Results saved to {out_path}") diff --git a/implementation/run_table5_full.py b/implementation/run_table5_full.py new file mode 100644 index 0000000..f251272 --- /dev/null +++ b/implementation/run_table5_full.py @@ -0,0 +1,288 @@ +# run_table5_full.py +# Rewritten to reproduce Figure 3 & Table 5 from Zhou et al. (2024) with aggregation + stability fixes + +import os, random, numpy as np, pandas as pd, sys +import torch, torch.nn as nn, torch.nn.functional as F +from torch_geometric.datasets import Planetoid, Amazon, CitationFull +from torch_geometric.data import Data +from sklearn.model_selection import train_test_split +import matplotlib.pyplot as plt + + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from pygip.models.nn.pyg_backbones import GCN, GAT, GraphSAGE, GIN, SGC + +# ---------------------------- +# Config +# ---------------------------- +SEEDS = [0, 1] +NUM_INDEP = 3 # fewer independent models +NUM_SURR = 3 # fewer surrogates +MODEL_TRAIN_EPOCHS = 40 +SURR_TRAIN_EPOCHS = 40 +COWN_TRAIN_EPOCHS = 20 +MASK_RATIOS = [0.0, 0.1, 0.2, 0.4] + + +# ---------------------------- +# Helpers +# ---------------------------- +def set_seed(seed=0): + random.seed(seed); np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + +def load_dataset(name, device="cpu"): + lname = name.lower() + if lname in [ "pubmed","cora","citeseer"]: + dataset = Planetoid(root=f"data/{name}", name=name) + data = dataset[0].to(device) + elif "amazon" in lname: + sub = "Photo" if "photo" in lname else "Computers" + dataset = Amazon(root=f"data/{lname}", name=sub) + data = dataset[0].to(device) + elif lname in ["dblp","db_lp","db-lp"]: + dataset = CitationFull(root="data/dblp", name="dblp") + data = dataset[0].to(device) + else: + raise ValueError(f"Unknown dataset {name}") + return data, dataset + +def split_nodes(num_nodes, ratios=(0.3,0.3,0.3,0.1), seed=0): + rng = np.random.RandomState(seed) + perm = rng.permutation(num_nodes) + sizes = [int(r*num_nodes) for r in ratios] + sizes[-1] = num_nodes - sum(sizes[:-1]) + splits, names, start = {}, ["train","dshadow","dsurr","dtest"], 0 + for name, sz in zip(names, sizes): + idx = perm[start:start+sz] + mask = torch.zeros(num_nodes, dtype=torch.bool); mask[idx] = True + splits[name] = mask; start += sz + return splits + +def filter_edges_to_mask(data, mask): + ei = data.edge_index; mask = mask.to(ei.device) + keep = ((mask[ei[0]] == True) & (mask[ei[1]] == True)) + return ei[:, keep] + +def mask_features_global(data, mask_ratio=0.1, seed=0): + x = data.x.clone(); num_feats = x.size(1) + k = max(1, int(mask_ratio * num_feats)) + rng = np.random.RandomState(seed) + feat_idx = rng.choice(num_feats, k, replace=False) + x[:, feat_idx] = 0.0 + data2 = Data(x=x, edge_index=data.edge_index.clone(), y=data.y.clone()) + return data2, feat_idx + +# ---------------------------- +# Models & Training +# ---------------------------- +def build_model(model_type, in_dim, out_dim, layers=2): + cls_map = {"GCN": GCN, "GraphSAGE": GraphSAGE, "GAT": GAT, "GIN": GIN, "SGC": SGC} + cls = cls_map[model_type] + try: + return cls(in_channels=in_dim, out_channels=out_dim, num_layers=layers) + except TypeError: + return cls(in_dim, out_dim, layers) + +def train_model(model, data, train_mask, epochs=200, lr=0.01, device="cpu"): + model = model.to(device); data = data.to(device) + opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) + for _ in range(epochs): + model.train(); opt.zero_grad() + out = model(data.x, data.edge_index) + loss = F.cross_entropy(out[train_mask], data.y[train_mask]) + loss.backward(); opt.step() + return model + +def train_with_soft_labels(model, data, train_mask, soft_targets, epochs=200, lr=0.01, device="cpu"): + model = model.to(device); data = data.to(device) + soft_targets = soft_targets.to(device) + opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4) + for _ in range(epochs): + model.train(); opt.zero_grad() + out = F.log_softmax(model(data.x, data.edge_index), dim=1) + loss = F.kl_div(out[train_mask], soft_targets[train_mask], reduction='batchmean') + loss.backward(); opt.step() + return model + +def compute_accuracy(model, data, mask): + model.eval() + with torch.no_grad(): + logits = model(data.x, data.edge_index) + pred = logits.argmax(dim=1) + return (pred[mask] == data.y[mask]).float().mean().item() * 100 + +def compute_fidelity(model, target, data, mask): + model.eval(); target.eval() + with torch.no_grad(): + pred_m = model(data.x, data.edge_index).argmax(dim=1) + pred_t = target(data.x, data.edge_index).argmax(dim=1) + return (pred_m[mask] == pred_t[mask]).float().mean().item() * 100 + +# ---------------------------- +# Holistic vectors & C_own +# ---------------------------- +def model_to_vector_probs(model, data, node_order=None): + model.eval() + with torch.no_grad(): + probs = F.softmax(model(data.x, data.edge_index), dim=1).cpu() + if node_order is None: + node_order = torch.arange(probs.size(0)) + return probs[node_order].reshape(-1).numpy() + +class COwn(nn.Module): + def __init__(self, input_dim): + super().__init__() + self.net = nn.Sequential( + nn.Linear(input_dim, 128), nn.ReLU(), + nn.Linear(128, 64), nn.ReLU(), + nn.Linear(64, 2) + ) + def forward(self, x): return self.net(x) + +# ---------------------------- +# Settings mapping (I–IV) +# ---------------------------- +def get_setting_architectures(setting): + overlapping, disjoint = ["GCN","GAT","GraphSAGE"], ["GIN","SGC"] + l_same, l_diff = 2, 3 + if setting == "I": Fs, Find, lFs, lFind = overlapping, overlapping, l_same, l_same + elif setting == "II": Fs, Find, lFs, lFind = overlapping, overlapping, l_diff, l_same + elif setting == "III": Fs, Find, lFs, lFind = disjoint, overlapping, l_same, l_same + elif setting == "IV": Fs, Find, lFs, lFind = disjoint, overlapping, l_diff, l_same + else: raise ValueError("Invalid setting") + return Fs, Find, lFs, lFind + +# ---------------------------- +# Main experiment (Table 5 / Fig 3) +# ---------------------------- +def run_table5_full(dataset_name, setting="I", inductive=False, device="cpu"): + data_orig, dataset = load_dataset(dataset_name, device=device) + in_dim, out_dim = dataset.num_features, dataset.num_classes + Fs, Find, lFs, lFind = get_setting_architectures(setting) + + results = [] + for seed in SEEDS: + set_seed(seed) + splits = split_nodes(data_orig.num_nodes, seed=seed) + node_order = torch.where(splits["train"])[0] + + # baseline target + base_model = build_model("GCN", in_dim, out_dim, 2) + base_model = train_model(base_model, data_orig, splits["train"], + epochs=MODEL_TRAIN_EPOCHS, device=device) + base_acc = compute_accuracy(base_model, data_orig, splits["dtest"]) + + for mask_ratio in MASK_RATIOS: + data_masked, _ = mask_features_global(data_orig, mask_ratio, seed=seed) + + # train masked target + tgt = build_model("GCN", in_dim, out_dim, 2) + tgt = train_model(tgt, data_masked, splits["train"], epochs=MODEL_TRAIN_EPOCHS, device=device) + tgt_acc = compute_accuracy(tgt, data_masked, splits["dtest"]) + drop = base_acc - tgt_acc + print(f"[{dataset_name}-{setting}-seed{seed}] Mask={mask_ratio:.2f}, acc={tgt_acc:.2f}, drop={drop:.2f}") + + # Independents + indep_vecs, indep_accs = [], [] + for arch in Find: + for j in range(NUM_INDEP): + m = build_model(arch, in_dim, out_dim, lFind) + m = train_model(m, data_masked, splits["train"], epochs=MODEL_TRAIN_EPOCHS, device=device) + indep_accs.append(compute_accuracy(m, data_masked, splits["dtest"])) + indep_vecs.append(model_to_vector_probs(m, data_masked, node_order)) + + # Surrogates + with torch.no_grad(): + soft_all = F.softmax(tgt(data_masked.x, data_masked.edge_index), dim=1).cpu() + + surr_vecs, surr_accs, surr_fids = [], [], [] + for arch in Fs: + for j in range(NUM_SURR): + m = build_model(arch, in_dim, out_dim, lFs) + m = train_with_soft_labels(m, data_masked, splits["train"], soft_all, + epochs=SURR_TRAIN_EPOCHS, device=device) + surr_accs.append(compute_accuracy(m, data_masked, splits["dtest"])) + surr_fids.append(compute_fidelity(m, tgt, data_masked, splits["dtest"])) + surr_vecs.append(model_to_vector_probs(m, data_masked, node_order)) + + # Ownership classifier (full batch training for stability) + X = np.vstack(indep_vecs + surr_vecs) + y = np.array([0]*len(indep_vecs) + [1]*len(surr_vecs)) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.3, stratify=y, random_state=seed + ) + cown = COwn(X.shape[1]).to(device) + opt = torch.optim.Adam(cown.parameters(), lr=0.001, weight_decay=1e-4) + X_train_t, y_train_t = torch.tensor(X_train,dtype=torch.float32,device=device), torch.tensor(y_train,dtype=torch.long,device=device) + X_test_t, y_test_t = torch.tensor(X_test,dtype=torch.float32,device=device), torch.tensor(y_test,dtype=torch.long,device=device) + + for epoch in range(COWN_TRAIN_EPOCHS): + cown.train() + out = cown(X_train_t) + loss = F.cross_entropy(out, y_train_t) + opt.zero_grad(); loss.backward(); opt.step() + + with torch.no_grad(): + preds = cown(X_test_t).argmax(dim=1).cpu().numpy() + c_acc = (preds == y_test).mean()*100 + print(f"[{dataset_name}-{setting}-seed{seed}] C_own acc={c_acc:.2f}") + + # save + results.append({ + "dataset": dataset_name, + "setting": setting, + "mode": "Inductive" if inductive else "Transductive", + "seed": seed, + "mask_ratio": mask_ratio, + "target_acc": tgt_acc, + "indep_acc_mean": np.mean(indep_accs), + "surr_acc_mean": np.mean(surr_accs), + "surr_fid_mean": np.mean(surr_fids), + "cown_acc": c_acc + }) + + return pd.DataFrame(results) + +# ---------------------------- +# Driver +# ---------------------------- +if __name__ == "__main__": + os.makedirs("results", exist_ok=True) + datasets, settings = ["Cora","CiteSeer","PubMed","Amazon","dblp"], ["I","II","III","IV"] + device = "cuda" if torch.cuda.is_available() else "cpu" + all_results = [] + + for ds in datasets: + for st in settings: + for mode in [False, True]: # transductive=False, inductive=True + df = run_table5_full(dataset_name=ds, setting=st, inductive=mode, device=device) + all_results.append(df) + + all_results = pd.concat(all_results, ignore_index=True) + all_results.to_csv("results/all_results_per_seed.csv", index=False) + + # --- Aggregation for analyze_tables_extended.py --- + agg = all_results.groupby(["dataset","setting","mode"]).agg({ + "target_acc": ["mean","std"], + "indep_acc_mean": ["mean","std"], + "surr_acc_mean": ["mean","std"], + "surr_fid_mean": ["mean","std"], + "cown_acc": ["mean","std"] + }).reset_index() + + agg.columns = [ + "dataset","setting","mode", + "target_acc_mean","target_acc_std", + "indep_acc_mean","indep_acc_std", + "surr_acc_mean","surr_acc_std", + "surr_fid_mean","surr_fid_std", + "cown_acc_mean","cown_acc_std" + ] + agg.to_csv("results/table5_all_results.csv", index=False) + + print("✅ Saved results/all_results_per_seed.csv and results/table5_all_results.csv (aggregated)") diff --git a/pygip/datasets/pyg_datasets.py b/pygip/datasets/pyg_datasets.py new file mode 100644 index 0000000..a875558 --- /dev/null +++ b/pygip/datasets/pyg_datasets.py @@ -0,0 +1,181 @@ +import torch +import torch.nn.functional as F +from torch_geometric.nn import GCNConv # for GCN models +from torch_geometric.datasets import ( + Planetoid, + Amazon, + Coauthor, + Flickr, + Reddit, + TUDataset, + FacebookPagePage, + LastFMAsia, + PolBlogs as PolBlogsPyG, + DBLP, + +) + +# ---------------------------- +# Base Dataset Wrapper +# ---------------------------- +class BasePyGDataset: + def __init__(self, dataset, data): + self.graph_dataset = dataset + self.graph_data = data + self.num_nodes = data.num_nodes + self.num_features = dataset.num_node_features + self.num_classes = dataset.num_classes + + def _generate_masks_by_classes(self, num_class_samples=100, val_count=500, test_count=1000, seed=42): + """Generate train/val/test masks by selecting fixed number of nodes per class.""" + num_nodes = self.graph_data.num_nodes + labels = self.graph_data.y + num_classes = int(labels.max().item()) + 1 + + used_mask = torch.zeros(num_nodes, dtype=torch.bool) + generator = torch.Generator().manual_seed(seed) + train_idx_parts = [] + + # train set + for c in range(num_classes): + class_idx = (labels == c).nonzero(as_tuple=True)[0] + if class_idx.numel() == 0: + continue + perm = class_idx[torch.randperm(class_idx.size(0), generator=generator)] + n_select = min(num_class_samples, perm.size(0)) + selected = perm[:n_select] + train_idx_parts.append(selected) + used_mask[selected] = True + + if len(train_idx_parts) == 0: + raise ValueError("No training samples available.") + + train_idx = torch.cat(train_idx_parts, dim=0) + + # val set + remaining_idx = (~used_mask).nonzero(as_tuple=True)[0] + remaining_perm = remaining_idx[torch.randperm(remaining_idx.size(0), generator=generator)] + val_take = min(val_count, remaining_perm.size(0)) + val_idx = remaining_perm[:val_take] + used_mask[val_idx] = True + + # test set + remaining_idx = (~used_mask).nonzero(as_tuple=True)[0] + test_take = min(test_count, remaining_idx.size(0)) + test_idx = remaining_idx[:test_take] + + self.graph_data.train_mask = self._index_to_mask(train_idx, num_nodes) + self.graph_data.val_mask = self._index_to_mask(val_idx, num_nodes) + self.graph_data.test_mask = self._index_to_mask(test_idx, num_nodes) + + def _index_to_mask(self, index: torch.Tensor, size: int): + mask = torch.zeros(size, dtype=torch.bool) + mask[index] = True + return mask + +# ---------------------------- +# Datasets +# ---------------------------- +class Cora(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Planetoid(root=path, name="Cora") + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" # required for CustomAttack + + +class CiteSeer(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Planetoid(root=path, name="CiteSeer") + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" + + + +class PubMed(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Planetoid(root=path, name="PubMed") + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" + +class DBLP(BasePyGDataset): + def __init__(self, path="./data"): + dataset = DBLP(root=path, name="DBLP") + super().__init__(dataset, dataset[0]) + self._generate_masks_by_classes() + self.api_type = "pyg" + + + +class Amazon(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Amazon(root=path, name="Computers") + super().__init__(dataset, dataset[0]) + self._generate_masks_by_classes() + self.api_type = "pyg" + + +class Photo(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Amazon(root=path, name="Photo") + super().__init__(dataset, dataset[0]) + self._generate_masks_by_classes() + self.api_type = "pyg" + + +class CoauthorCS(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Coauthor(root=path, name="CS") + super().__init__(dataset, dataset[0]) + self._generate_masks_by_classes() + self.api_type = "pyg" + + +class CoauthorPhysics(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Coauthor(root=path, name="Physics") + super().__init__(dataset, dataset[0]) + self._generate_masks_by_classes() + self.api_type = "pyg" + + +class ENZYMES(BasePyGDataset): + def __init__(self, path="./data"): + dataset = TUDataset(root=path, name="ENZYMES") + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" + + +class Facebook(BasePyGDataset): + def __init__(self, path="./data"): + dataset = FacebookPagePage(root=path) + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" + + +class Flickr(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Flickr(root=path) + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" + + +class PolBlogs(BasePyGDataset): + def __init__(self, path="./data"): + dataset = PolBlogsPyG(root=path) + super().__init__(dataset, dataset[0]) + self._generate_masks_by_classes() + self.api_type = "pyg" + + +class LastFM(BasePyGDataset): + def __init__(self, path="./data"): + dataset = LastFMAsia(root=path) + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" + + +class Reddit(BasePyGDataset): + def __init__(self, path="./data"): + dataset = Reddit(root=path) + super().__init__(dataset, dataset[0]) + self.api_type = "pyg" diff --git a/pygip/models/nn/pyg_backbones.py b/pygip/models/nn/pyg_backbones.py new file mode 100644 index 0000000..bad65ec --- /dev/null +++ b/pygip/models/nn/pyg_backbones.py @@ -0,0 +1,96 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch_geometric.nn import GCNConv, SAGEConv, GATConv, GINConv, SGConv + + + +# ---------------------------- +# GCN +# ---------------------------- +class GCN(nn.Module): + def __init__(self, in_channels, out_channels, hidden=64, num_layers=2): + super().__init__() + self.convs = nn.ModuleList([GCNConv(in_channels, hidden)]) + for _ in range(num_layers - 2): + self.convs.append(GCNConv(hidden, hidden)) + self.convs.append(GCNConv(hidden, out_channels)) + + def forward(self, x, edge_index): + for conv in self.convs[:-1]: + x = F.relu(conv(x, edge_index)) + return self.convs[-1](x, edge_index) + +# ---------------------------- +# GraphSAGE +# ---------------------------- +class GraphSAGE(nn.Module): + def __init__(self, in_channels, out_channels, hidden=64, num_layers=2): + super().__init__() + self.convs = nn.ModuleList([SAGEConv(in_channels, hidden)]) + for _ in range(num_layers - 2): + self.convs.append(SAGEConv(hidden, hidden)) + self.convs.append(SAGEConv(hidden, out_channels)) + + def forward(self, x, edge_index): + for conv in self.convs[:-1]: + x = F.relu(conv(x, edge_index)) + return self.convs[-1](x, edge_index) + +# ---------------------------- +# GAT +# ---------------------------- +class GAT(nn.Module): + def __init__(self, in_channels, out_channels, hidden=64, num_layers=2, heads=4): + super().__init__() + self.convs = nn.ModuleList([GATConv(in_channels, hidden, heads=heads)]) + for _ in range(num_layers - 2): + self.convs.append(GATConv(hidden * heads, hidden, heads=heads)) + self.convs.append(GATConv(hidden * heads, out_channels, heads=1)) + + def forward(self, x, edge_index): + for conv in self.convs[:-1]: + x = F.elu(conv(x, edge_index)) + return self.convs[-1](x, edge_index) + +# ---------------------------- +# GIN +# ---------------------------- +class GIN(nn.Module): + def __init__(self, in_dim, out_dim, hid_dim=64, num_layers=2): + super().__init__() + nn1 = nn.Sequential( + nn.Linear(in_dim, hid_dim), + nn.ReLU(), + nn.Linear(hid_dim, hid_dim) + ) + self.convs = nn.ModuleList([GINConv(nn1)]) + for _ in range(num_layers - 2): + nnk = nn.Sequential( + nn.Linear(hid_dim, hid_dim), + nn.ReLU(), + nn.Linear(hid_dim, hid_dim) + ) + self.convs.append(GINConv(nnk)) + nn_last = nn.Sequential( + nn.Linear(hid_dim, hid_dim), + nn.ReLU(), + nn.Linear(hid_dim, out_dim) + ) + self.convs.append(GINConv(nn_last)) + + def forward(self, x, edge_index): + for conv in self.convs[:-1]: + x = F.relu(conv(x, edge_index)) + return self.convs[-1](x, edge_index) + +# ---------------------------- +# SGC +# ---------------------------- +class SGC(nn.Module): + def __init__(self, in_dim, out_dim, K=2): + super().__init__() + self.conv = SGConv(in_dim, out_dim, K=K) + + def forward(self, x, edge_index): + return self.conv(x, edge_index) diff --git a/results/BGrOVe_table4.csv b/results/BGrOVe_table4.csv new file mode 100644 index 0000000..41c1245 --- /dev/null +++ b/results/BGrOVe_table4.csv @@ -0,0 +1,41 @@ +Dataset,Condition,Setting,FPR (%),FNR (%),ACC (%) +Cora,CondA ✓,I,91.11 ± 17.78,4.44 ± 8.89,52.22 ± 4.44 +Cora,CondA ✓,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +Cora,CondA ✓,III,0.00 ± 0.00,70.00 ± 10.00,72.00 ± 4.00 +Cora,CondA ✓,IV,0.00 ± 0.00,70.00 ± 10.00,72.00 ± 4.00 +Cora,CondA ✗,I,86.67 ± 10.89,6.67 ± 5.44,53.33 ± 2.72 +Cora,CondA ✗,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +Cora,CondA ✗,III,0.00 ± 0.00,70.00 ± 10.00,72.00 ± 4.00 +Cora,CondA ✗,IV,0.00 ± 0.00,70.00 ± 10.00,72.00 ± 4.00 +CiteSeer,CondA ✓,I,71.11 ± 29.48,20.00 ± 24.75,54.44 ± 4.16 +CiteSeer,CondA ✓,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +CiteSeer,CondA ✓,III,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +CiteSeer,CondA ✓,IV,0.00 ± 0.00,70.00 ± 10.00,72.00 ± 4.00 +CiteSeer,CondA ✗,I,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +CiteSeer,CondA ✗,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +CiteSeer,CondA ✗,III,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +CiteSeer,CondA ✗,IV,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +Amazon,CondA ✓,I,71.11 ± 36.92,20.00 ± 29.31,54.44 ± 4.16 +Amazon,CondA ✓,II,53.33 ± 42.40,26.67 ± 25.92,60.00 ± 8.89 +Amazon,CondA ✓,III,0.00 ± 0.00,30.00 ± 10.00,88.00 ± 4.00 +Amazon,CondA ✓,IV,3.33 ± 6.67,35.00 ± 12.25,84.00 ± 4.90 +Amazon,CondA ✗,I,93.33 ± 8.89,0.00 ± 0.00,53.33 ± 4.44 +Amazon,CondA ✗,II,24.44 ± 38.75,55.56 ± 31.43,60.00 ± 6.48 +Amazon,CondA ✗,III,0.00 ± 0.00,35.00 ± 12.25,86.00 ± 4.90 +Amazon,CondA ✗,IV,0.00 ± 0.00,40.00 ± 12.25,84.00 ± 4.90 +DBLP,CondA ✓,I,91.11 ± 12.96,4.44 ± 8.89,52.22 ± 2.72 +DBLP,CondA ✓,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +DBLP,CondA ✓,III,6.67 ± 8.16,60.00 ± 20.00,72.00 ± 4.00 +DBLP,CondA ✓,IV,13.33 ± 12.47,45.00 ± 18.71,74.00 ± 4.90 +DBLP,CondA ✗,I,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +DBLP,CondA ✗,II,91.11 ± 17.78,6.67 ± 13.33,51.11 ± 2.22 +DBLP,CondA ✗,III,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +DBLP,CondA ✗,IV,6.67 ± 8.16,30.00 ± 24.49,84.00 ± 10.20 +PubMed,CondA ✓,I,97.78 ± 4.44,0.00 ± 0.00,51.11 ± 2.22 +PubMed,CondA ✓,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +PubMed,CondA ✓,III,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +PubMed,CondA ✓,IV,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +PubMed,CondA ✗,I,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +PubMed,CondA ✗,II,100.00 ± 0.00,0.00 ± 0.00,50.00 ± 0.00 +PubMed,CondA ✗,III,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 +PubMed,CondA ✗,IV,0.00 ± 0.00,75.00 ± 0.00,70.00 ± 0.00 \ No newline at end of file diff --git a/results/CORA_table3.csv b/results/CORA_table3.csv new file mode 100644 index 0000000..4bb6274 --- /dev/null +++ b/results/CORA_table3.csv @@ -0,0 +1,7 @@ +Dataset,Model,Setting,TCA,ECA,TBA,EBA +Cora,GAT,With Backdoor,73.97540808,81.65983558,4.629629478,4.629629478 +Cora,GAT,Without Backdoor,79.30327654,79.50819731,77.7777791,83.33333135 +Cora,GCN,With Backdoor,77.76639462,82.17213154,5.555555597,9.259258956 +Cora,GCN,Without Backdoor,82.78688788,84.93852615,86.11111045,85.18518806 +Cora,GraphSAGE,With Backdoor,80.84016442,84.22130942,4.629629478,0.9259259328 +Cora,GraphSAGE,Without Backdoor,82.99180269,84.73360538,85.18518806,87.9629612 \ No newline at end of file diff --git a/results/CiteSeer_Table3.csv b/results/CiteSeer_Table3.csv new file mode 100644 index 0000000..a4cf971 --- /dev/null +++ b/results/CiteSeer_Table3.csv @@ -0,0 +1,8 @@ +Dataset,Model,Setting,TCA,ECA,TBA,EBA +CiteSeer,GCN,With Backdoor,64.77462649,68.53088737,20.30075192,9.022556245 +CiteSeer,GCN,With Backdoor,64.77462649,68.53088737,20.30075192,9.022556245 +CiteSeer,GCN,Without Backdoor,71.45242095,69.61602569,69.17293072,65.41353464 +CiteSeer,GAT,With Backdoor,68.11352372,67.19532609,28.57142985,8.270676434 +CiteSeer,GAT,Without Backdoor,72.37061858,69.44907904,70.67669034,69.92481351 +CiteSeer,GraphSAGE,With Backdoor,71.70283794,70.86811066,22.55639136,15.78947306 +CiteSeer,GraphSAGE,Without Backdoor,74.62437153,71.53589129,73.68420959,71.42857313 \ No newline at end of file diff --git a/results/desc b/results/desc new file mode 100644 index 0000000..9b4db40 --- /dev/null +++ b/results/desc @@ -0,0 +1 @@ +This folder consists of all the experiments carried out. diff --git a/results/table5_all_results.csv b/results/table5_all_results.csv new file mode 100644 index 0000000..be42d30 --- /dev/null +++ b/results/table5_all_results.csv @@ -0,0 +1,41 @@ +dataset,setting,mode,target_acc_mean,target_acc_std,indep_acc_mean,indep_acc_std,surr_acc_mean,surr_acc_std,surr_fid_mean,surr_fid_std,cown_acc_mean,cown_acc_std +Amazon,I,Inductive,74.72766936,2.433793066,75.99350429,2.807438253,66.12099587,3.767657614,84.28144935,2.742822875,93.75,8.625819492 +Amazon,I,Transductive,74.72766936,2.433793066,75.98846116,2.809416573,66.26724779,3.505791579,84.50839197,2.287750406,93.75,8.625819492 +Amazon,II,Inductive,74.21024069,2.936450688,75.722182,1.483742129,64.10977973,3.810170195,82.16029273,2.239979708,100,0 +Amazon,II,Transductive,74.21024069,2.936450688,75.72319065,1.48381853,64.13096107,3.76414472,82.13205106,1.9391017,100,0 +Amazon,III,Inductive,74.818445,2.776157246,76.05906568,1.315007683,48.65196113,3.906533014,61.48329745,5.397102683,100,0 +Amazon,III,Transductive,74.80936721,2.771182961,76.05503119,1.313337296,48.65196113,3.906533014,61.48934931,5.39143826,100,0 +Amazon,IV,Inductive,77.1060288,4.273100267,75.75849262,1.420200514,50.7035226,5.022975708,62.91454871,6.082537691,97.5,7.071067812 +Amazon,IV,Transductive,77.1060288,4.273100267,75.75748406,1.420005262,50.7035226,5.022975708,62.91454871,6.082537691,97.5,7.071067812 +CiteSeer,I,Inductive,71.77177369594574,1.0647533107843492,72.94377974338002,1.3912004966121483,73.57774749398232,1.1895509263973538,91.66249790125423,0.5610137603513385,91.66666666666667,8.908708063747481 +CiteSeer,I,Transductive,71.77177369594574,1.0647533107843492,72.94377974338002,1.3912004966121483,73.56940582,1.1983429173168936,91.65415623121791,0.5644055196687388,91.66666666666667,8.908708063747481 +CiteSeer,II,Inductive,72.10961133241653,1.1055630562462107,72.92292540272076,1.138527077192637,72.17634485827551,1.5939817451938052,91.49983541833029,0.9707933926959381,93.75,8.625819491779417 +CiteSeer,II,Transductive,72.10961133241653,1.1055630562462107,72.92292540272076,1.138527077192637,72.18468652831183,1.585126582819718,91.51234792338477,0.9698406293019681,91.66666666666667,8.908708063747481 +CiteSeer,III,Inductive,71.84684872627258,0.9186045352785076,73.03970886601343,1.3594631797562717,51.82682887340585,2.433482699252227,60.47297349820534,2.528358882931918,100,0 +CiteSeer,III,Transductive,71.84684872627258,0.9186045352785076,73.03970886601343,1.3594631797562717,51.82682887340585,2.433482699252227,60.47297349820534,2.528358882931918,100,0 +CiteSeer,IV,Inductive,72.40991219878197,1.4574197263651163,72.95212108227942,1.1431189724073934,56.65040258318186,3.119829912164031,66.52277372777462,3.530106445182951,100,0 +CiteSeer,IV,Transductive,72.40991219878197,1.4574197263651163,72.95212108227942,1.1431189724073934,56.64414633065462,3.110560938860846,66.52902998030186,3.5384408830786906,100,0 +Cora,I,Inductive,86.07536629,0.364350057,85.08476276,0.729251325,86.01919835,0.728699547,95.40951807,1.552431607,95.83333333,7.715167498 +Cora,I,Transductive,86.07536629,0.364350057,85.08476276,0.729251325,86.01919835,0.728699547,95.40951807,1.552431607,95.83333333,7.715167498 +Cora,II,Inductive,86.16727814,0.67897562,85.20220543,0.684504016,85.75367555,0.713492315,95.37377523,0.875243012,91.66666667,8.908708064 +Cora,II,Transductive,86.16727814,0.67897562,85.20220543,0.684504016,85.75367555,0.713492315,95.37377523,0.875243012,91.66666667,8.908708064 +Cora,III,Inductive,85.98345444,0.603706665,85.05412497,0.95587684,59.26776932,2.043234332,62.53829622,2.596340859,100,0 +Cora,III,Transductive,85.98345444,0.603706665,85.05412497,0.95587684,59.26776932,2.043234332,62.53829622,2.596340859,100,0 +Cora,IV,Inductive,85.98345518,1.11923336,85.16646177,0.710374363,66.8734679,2.61199434,70.71844321,2.718749521,100,0 +Cora,IV,Transductive,85.98345518,1.11923336,85.16646177,0.710374363,66.8734679,2.61199434,70.71844321,2.718749521,100,0 +PubMed,I,Inductive,85.47160029411316,1.4055767825866161,86.07800669140286,1.2555878405802159,84.03622731566429,1.5159967661156328,95.70162635710504,0.48851841394759443,100,0 +PubMed,I,Transductive,85.47160029411316,1.4055767825866161,86.07871101962195,1.255610642018456,84.03622731566429,1.5159967661156328,95.70162635710504,0.48851841394759443,100,0 +PubMed,II,Inductive,85.43990552425385,1.5075765891313828,86.10547466410532,1.219525037818393,83.65519858,1.5021227600145137,94.75574476851358,0.5504532852567569,100,0 +PubMed,II,Transductive,85.43990552425385,1.5075765891313828,86.10547466410532,1.219525037818393,83.65449433525403,1.5014299316259458,94.75504044029448,0.5503271029803263,100,0 +PubMed,III,Inductive,85.45892238616943,1.4018459600203355,86.10899606,1.235588847369993,57.03177681813637,1.537385352822437,60.18424490466714,1.7685813325906397,100,0 +PubMed,III,Transductive,85.45892238616943,1.4018459600203355,86.10899606,1.235588847369993,57.03177681813637,1.537385352822437,60.18424490466714,1.7685813325906397,100,0 +PubMed,IV,Inductive,85.37651822,1.4541180433778989,86.08434523145358,1.2317357633422827,63.20359905560811,3.853679200667111,67.01529628,4.251243661477413,100,0 +PubMed,IV,Transductive,85.37651822,1.4541180433778989,86.08434523145358,1.2317357633422827,63.20359905560811,3.853679200667111,67.01529628,4.251243661477413,100,0 +dblp,I,Inductive,84.09667015075684,0.8904874959006944,83.11411357588239,0.6737062534471557,83.48208309875594,1.0747514260566795,94.39433357781834,0.5186943011733857,100,0 +dblp,I,Transductive,84.09667015075684,0.8904874959006944,83.11411357588239,0.6737062534471557,83.48208309875594,1.0747514260566795,94.39433357781834,0.5186943011733857,100,0 +dblp,II,Inductive,84.15303975343704,0.8325283644192363,83.06322404079967,0.7391647255743868,84.17809307575226,0.7964256301882281,94.14928149845865,0.6040526812442378,100,0 +dblp,II,Transductive,84.15303975343704,0.8325283644192363,83.06165817711089,0.7366850732262857,84.17731018529997,0.7951250309841605,94.14849860800638,0.6041019644536465,100,0 +dblp,III,Inductive,84.12485420703888,0.8585653812645303,83.04521698090765,0.7541799999775749,63.776537527640656,5.409483281624359,67.23505876337488,6.011977794715171,100,0 +dblp,III,Transductive,84.12485420703888,0.8585653812645303,83.04756573504872,0.7529822086845728,63.777711925407246,5.410287503708998,67.23153569425146,6.014947800689849,100,0 +dblp,IV,Inductive,84.18826982,0.9237954497679515,83.08592844340536,0.6570379800683244,68.50572787225246,4.590483784023131,71.64364544053872,4.968114182274684,100,0 +dblp,IV,Transductive,84.18826982,0.9237954497679515,83.08514555295308,0.6573444249326462,68.50807654,4.593675097736268,71.64364544053872,4.968114182274684,100,0 diff --git a/results/table6_latest.csv b/results/table6_latest.csv new file mode 100644 index 0000000..dd3a70a --- /dev/null +++ b/results/table6_latest.csv @@ -0,0 +1,41 @@ +Dataset,Setting,Mode,Ori_ACC(%),FPR(%),FNR(%),Fine_ACC(%) +Amazon,I,Inductive,83.48,0,0,100 +Amazon,I,Transductive,88.27,0,0,100 +Amazon,II,Inductive,85.87,0,0,100 +Amazon,II,Transductive,86.23,0,0,100 +Amazon,III,Inductive,37.51,0,0,100 +Amazon,III,Transductive,37.51,0,0,100 +Amazon,IV,Inductive,37.51,0,0,100 +Amazon,IV,Transductive,37.51,0,0,100 +PubMed,I,Inductive,87.53,0,0,100 +PubMed,I,Transductive,87.44,0,0,100 +PubMed,II,Inductive,87.34,0,0,100 +PubMed,II,Transductive,87.34,0,0,100 +PubMed,III,Inductive,65.32,0,0,100 +PubMed,III,Transductive,65.26,0,0,100 +PubMed,IV,Inductive,83.32,0,0,100 +PubMed,IV,Transductive,83.32,0,0,100 +CiteSeer,I,Inductive,87.38,0,0,100 +CiteSeer,I,Transductive,87.92,0,0,100 +CiteSeer,II,Inductive,85.78,0,0,100 +CiteSeer,II,Transductive,85.81,0,0,100 +CiteSeer,III,Inductive,46.26,0,0,100 +CiteSeer,III,Transductive,46.26,0,0,100 +CiteSeer,IV,Inductive,71.6,0,0,100 +CiteSeer,IV,Transductive,71.6,0,0,100 +Cora,I,Inductive,94.68,0,0,100 +Cora,I,Transductive,94.28,0,0,100 +Cora,II,Inductive,93.57,0,0,100 +Cora,II,Transductive,93.57,0,0,100 +Cora,III,Inductive,43.69,0,0,100 +Cora,III,Transductive,43.69,0,0,100 +Cora,IV,Inductive,60.04,0,0,100 +Cora,IV,Transductive,59.27,0,0,100 +dblp,I,Inductive,87.83,0,0,100 +dblp,I,Transductive,87.62,0,0,100 +dblp,II,Inductive,88.68,0,0,100 +dblp,II,Transductive,88.62,0,0,100 +dblp,III,Inductive,57.05,0,0,100 +dblp,III,Transductive,57,0,0,100 +dblp,IV,Inductive,73.44,0,0,100 +dblp,IV,Transductive,73.44,0,0,100 diff --git a/results/table7.csv b/results/table7.csv new file mode 100644 index 0000000..7fbd4d6 --- /dev/null +++ b/results/table7.csv @@ -0,0 +1,41 @@ +Dataset,Setting,Mode,FPR +dblp,I,Inductive,33.33 ± 0.00 +dblp,I,Transductive,46.67 ± 16.33 +dblp,II,Inductive,60.00 ± 13.33 +dblp,II,Transductive,60.00 ± 13.33 +dblp,III,Inductive,40.00 ± 13.33 +dblp,III,Transductive,40.00 ± 13.33 +dblp,IV,Inductive,33.33 ± 0.00 +dblp,IV,Transductive,33.33 ± 0.00 +Amazon,I,Inductive,46.67 ± 16.33 +Amazon,I,Transductive,46.67 ± 16.33 +Amazon,II,Inductive,46.67 ± 16.33 +Amazon,II,Transductive,33.33 ± 0.00 +Amazon,III,Inductive,33.33 ± 0.00 +Amazon,III,Transductive,33.33 ± 0.00 +Amazon,IV,Inductive,33.33 ± 0.00 +Amazon,IV,Transductive,33.33 ± 0.00 +PubMed,I,Inductive,40.00 ± 13.33 +PubMed,I,Transductive,40.00 ± 13.33 +PubMed,II,Inductive,60.00 ± 13.33 +PubMed,II,Transductive,60.00 ± 13.33 +PubMed,III,Inductive,60.00 ± 13.33 +PubMed,III,Transductive,60.00 ± 13.33 +PubMed,IV,Inductive,60.00 ± 13.33 +PubMed,IV,Transductive,60.00 ± 13.33 +CiteSeer,I,Inductive,33.33 ± 0.00 +CiteSeer,I,Transductive,33.33 ± 0.00 +CiteSeer,II,Inductive,33.33 ± 0.00 +CiteSeer,II,Transductive,33.33 ± 0.00 +CiteSeer,III,Inductive,53.33 ± 16.33 +CiteSeer,III,Transductive,53.33 ± 16.33 +CiteSeer,IV,Inductive,40.00 ± 13.33 +CiteSeer,IV,Transductive,40.00 ± 13.33 +Cora,I,Inductive,60.00 ± 13.33 +Cora,I,Transductive,60.00 ± 13.33 +Cora,II,Inductive,40.00 ± 13.33 +Cora,II,Transductive,40.00 ± 13.33 +Cora,III,Inductive,53.33 ± 16.33 +Cora,III,Transductive,53.33 ± 16.33 +Cora,IV,Inductive,53.33 ± 16.33 +Cora,IV,Transductive,53.33 ± 16.33 diff --git a/results/table8.csv b/results/table8.csv new file mode 100644 index 0000000..b9b25e8 --- /dev/null +++ b/results/table8.csv @@ -0,0 +1,41 @@ +Dataset,Setting,Mode,Ori_ACC(%),FPR(%),FNR(%),Double_ACC(%) +Amazon,I,Inductive,87.19,0.0,0.0,100.0 +Amazon,I,Transductive,87.4,0.0,0.0,100.0 +Amazon,II,Inductive,83.95,0.0,0.0,100.0 +Amazon,II,Transductive,83.97,0.0,0.0,100.0 +Amazon,III,Inductive,37.51,0.0,0.0,100.0 +Amazon,III,Transductive,37.51,0.0,0.0,100.0 +Amazon,IV,Inductive,37.51,0.0,0.0,100.0 +Amazon,IV,Transductive,37.51,0.0,0.0,100.0 +CiteSeer,I,Inductive,87.29,0.0,0.0,100.0 +CiteSeer,I,Transductive,87.29,0.0,0.0,100.0 +CiteSeer,II,Inductive,85.81,0.0,0.0,100.0 +CiteSeer,II,Transductive,85.81,0.0,0.0,100.0 +CiteSeer,III,Inductive,40.7,0.0,0.0,100.0 +CiteSeer,III,Transductive,42.02,0.0,0.0,100.0 +CiteSeer,IV,Inductive,49.23,0.0,0.0,100.0 +CiteSeer,IV,Transductive,49.23,0.0,0.0,100.0 +Cora,I,Inductive,94.05,0.0,0.0,100.0 +Cora,I,Transductive,94.35,0.0,0.0,100.0 +Cora,II,Inductive,93.17,0.0,0.0,100.0 +Cora,II,Transductive,93.17,0.0,0.0,100.0 +Cora,III,Inductive,30.21,0.0,0.0,100.0 +Cora,III,Transductive,30.21,0.0,0.0,100.0 +Cora,IV,Inductive,42.98,0.0,0.0,100.0 +Cora,IV,Transductive,42.98,0.0,0.0,100.0 +PubMed,I,Inductive,87.47,0.0,0.0,100.0 +PubMed,I,Transductive,87.47,0.0,0.0,100.0 +PubMed,II,Inductive,87.38,0.0,0.0,100.0 +PubMed,II,Transductive,87.38,0.0,0.0,100.0 +PubMed,III,Inductive,68.86,0.0,0.0,100.0 +PubMed,III,Transductive,68.86,0.0,0.0,100.0 +PubMed,IV,Inductive,67.85,0.0,0.0,100.0 +PubMed,IV,Transductive,67.85,0.0,0.0,100.0 +dblp,I,Inductive,88.02,0.0,0.0,100.0 +dblp,I,Transductive,87.99,0.0,0.0,100.0 +dblp,II,Inductive,89.05,0.0,0.0,100.0 +dblp,II,Transductive,89.02,0.0,0.0,100.0 +dblp,III,Inductive,52.08,0.0,0.0,100.0 +dblp,III,Transductive,52.08,0.0,0.0,100.0 +dblp,IV,Inductive,77.62,0.0,0.0,100.0 +dblp,IV,Transductive,77.6,0.0,0.0,100.0