From 0256da3ef1284570c0ac7ac1c096ceafe96bbb4a Mon Sep 17 00:00:00 2001 From: giosem1 Date: Wed, 19 Nov 2025 11:46:26 +0100 Subject: [PATCH] fix: Dario --- .gitignore | 2 +- .../hyperlink_prediction/loader/dataloader.py | 4 +- .../hyperlink_prediction/models/__init__.py | 4 +- .../models/hyperlink_prediction_algorithm.py | 252 ++++++++++++++++++ .../models/hyperlink_prediction_result.py | 24 +- hyperbench/pipelines/pipeline.py | 23 +- hyperbench/utils/__init__.py | 3 +- .../utils/data_and_sampling_selector.py | 16 +- tests/methods_test.py | 16 +- uv.lock | 4 +- 10 files changed, 312 insertions(+), 36 deletions(-) diff --git a/.gitignore b/.gitignore index 6e9ed75..a94c5de 100644 --- a/.gitignore +++ b/.gitignore @@ -179,4 +179,4 @@ results/* !*.gitkeep -.DS_Store \ No newline at end of file +*.DS_Store \ No newline at end of file diff --git a/hyperbench/hyperlink_prediction/loader/dataloader.py b/hyperbench/hyperlink_prediction/loader/dataloader.py index 9962436..303c0e2 100644 --- a/hyperbench/hyperlink_prediction/loader/dataloader.py +++ b/hyperbench/hyperlink_prediction/loader/dataloader.py @@ -20,10 +20,10 @@ class DatasetLoader(DataLoader): **kwargs: Additional arguments for the class. """ - def __init__(self, dataset: DatasetHyperGraph, negative_sampling: str, num_node: int, batch_size: int = 1, shuffle: bool = False, **kwargs): + def __init__(self, dataset: DatasetHyperGraph, negative_sampling: str, alpha: float, beta: int, num_node: int, batch_size: int = 1, shuffle: bool = False, **kwargs): kwargs.pop("collate_fn", None) - hypergraph_negative = setNegativeSamplingAlgorithm(negative_sampling, num_node).generate(dataset._data.edge_index) + hypergraph_negative = setNegativeSamplingAlgorithm(negative_sampling, num_node, alpha, beta).generate(dataset._data.edge_index) dataset.edge_index = hypergraph_negative.edge_index super().__init__( diff --git a/hyperbench/hyperlink_prediction/models/__init__.py b/hyperbench/hyperlink_prediction/models/__init__.py index 33a2ab4..4294eb1 100644 --- a/hyperbench/hyperlink_prediction/models/__init__.py +++ b/hyperbench/hyperlink_prediction/models/__init__.py @@ -1,9 +1,11 @@ from .hyperlink_prediction_base import HyperlinkPredictor -from .hyperlink_prediction_algorithm import CommonNeighbors +from .hyperlink_prediction_algorithm import CommonNeighbors, NeuralHP, FactorizationMachine from .hyperlink_prediction_result import HyperlinkPredictionResult __all__ = data_classes = [ 'HyperlinkPredictor', 'CommonNeighbors', + 'NeuralHP', + 'FactorizationMachine', 'HyperlinkPredictionResult' ] \ No newline at end of file diff --git a/hyperbench/hyperlink_prediction/models/hyperlink_prediction_algorithm.py b/hyperbench/hyperlink_prediction/models/hyperlink_prediction_algorithm.py index e5b9e1c..331aa0e 100644 --- a/hyperbench/hyperlink_prediction/models/hyperlink_prediction_algorithm.py +++ b/hyperbench/hyperlink_prediction/models/hyperlink_prediction_algorithm.py @@ -2,7 +2,9 @@ from torch import Tensor from .hyperlink_prediction_base import HyperlinkPredictor from .hyperlink_prediction_result import HyperlinkPredictionResult +import torch.nn as nn +#TODO: Add PyDoc class CommonNeighbors(HyperlinkPredictor): def __init__(self, device='cpu'): super().__init__(device) @@ -43,3 +45,253 @@ def predict(self, edge_index: Tensor): edge_index=new_edges, device=self.device ) + +#TODO: Add PyDoc +"""Extract from the code of the paper: https://malllabiisc.github.io/publications/papers/nhp_cikm20.pdf""" +class NeuralHP(HyperlinkPredictor, nn.Module): + def __init__(self, d=128, h=64, Type='s', score='mean', lam=1.0, device='cpu'): + HyperlinkPredictor.__init__(self, device) + nn.Module.__init__(self) + + self.H = None + self.X = None + self.device = device + self.d, self.h = d, h + self.Type, self.score, self.lam = Type, score, lam + + # Layers + self.loop = None # self-loop + self.GCN = None # hyperlink-aware GCN + self.INT = nn.Linear(h, 1) + if Type == "d": + self.BL = nn.Linear(h, h) + + self.to(self.device) + + def fit(self, X, y, edge_index, *args, **kwargs): + self.train() + if self.loop is None: + self.d = X.shape[1] + self.loop = nn.Linear(self.d, self.h) + self.GCN = nn.Linear(self.d, self.h) + + num_nodes = int(edge_index[0].max().item()) + 1 + num_hyperlinks = int(edge_index[1].max().item()) + 1 + + H = torch.sparse_coo_tensor( + edge_index, + torch.ones(edge_index.shape[1], device=self.device), + (num_nodes, num_hyperlinks), + device=self.device + ).to_dense() + + iX = X + jX = X.clone() + iAX = torch.matmul(H.T, X) + jAX = iAX.clone() + I = H.clone() + + optimizer = torch.optim.Adam(self.parameters(), lr=1e-3) + epochs = kwargs.get("epochs", 50) + + for _ in range(epochs): + optimizer.zero_grad() + data = { + 'iX': (iX,), + 'iAX': (iAX,), + 'jX': (jX,), + 'jAX': (jAX,), + 'I': (I,) + } + scores = self.forward(data, test=True) + metrics = self.metrics(scores) + loss = metrics['loss'] + loss.backward() + optimizer.step() + + self.H = H.detach() + self.X = X.detach() + return self + + def predict(self, edge_index: Tensor): + if self.H is None or self.X is None: + if edge_index is None: + raise ValueError("Model not fitted. Call fit() first.") + self.fit(self.X, None, edge_index) + + H = self.H.to(self.device) + X = self.X + + iX = X + jX = X.clone() + iAX = torch.matmul(H.T, X) + jAX = iAX.clone() + I = H.clone() + + data = { + 'iX': (iX,), + 'iAX': (iAX,), + 'jX': (jX,), + 'jAX': (jAX,), + 'I': (I,) + } + + self.eval() + with torch.no_grad(): + scores = self.forward(data, test=True) + S = scores['S'].cpu() + S_ = scores['S_'].cpu() + + new_edges = torch.nonzero(torch.triu(torch.matmul(H, H.T), diagonal=1)).T + + return HyperlinkPredictionResult( + edge_index=new_edges, + device=self.device + ) + + def forward(self, data, test=False): + iX = self._get(data, 'iX', test=test) + jX = self._get(data, 'jX', test=test) + I = self._get(data, 'I', test=test) + + iAX = self._get(data, 'iAX', test=test) + jAX = self._get(data, 'jAX', test=test) + + iX_proj = self.loop(iX) + jX_proj = self.loop(jX) + + iAX_proj = self.GCN(iAX) + jAX_proj = self.GCN(jAX) + + + iAX_agg = torch.matmul(I, iAX_proj) + jAX_agg = torch.matmul(I, jAX_proj) + + + iH = iX_proj + iAX_agg + jH = jX_proj + jAX_agg + + # Scoring + if self.score == 'mean': + IH, JH = self._mean(I, iH), self._mean(I, jH) + elif self.score == 'maxmin': + IH, JH = self._maxmin(I, iH), self._maxmin(I, jH) + + S = torch.sigmoid(self.INT(IH)) + S_ = torch.sigmoid(self.INT(JH)) + + D = {"S": S, "S_": S_} + + if self.Type == "d": + IHp = self._mean((I == 1).float(), iH) + IHn = self._mean((I == -1).float(), iH) + Sb = torch.mm(self.BL(IHp), IHn.t()).diagonal().unsqueeze(1) + + JHp = self._mean((I == 1).float(), jH) + JHn = self._mean((I == -1).float(), jH) + Sb_ = torch.mm(self.BL(JHp), JHn.t()).diagonal().unsqueeze(1) + + D['Sb'], D['Sb_'] = Sb, Sb_ + + return D + + + def _mean(self, K, H): + L = K * K + L = L / torch.sum(L, dim=0, keepdim=True) + return torch.mm(L.t(), H) + + def _maxmin(self, K, H): + L = K.t() + B = H.repeat(L.size()[0], 1, 1) + d = B.size()[-1] + L = L.repeat_interleave(d).view(L.size()[0], L.size()[1], d) + LB = (L == 1).float() * B + M = torch.max(LB, dim=1)[0] + if self.Type == "d": + LB = (L == -1).float() * B + m = torch.min((LB == 0).float() * 1e4 + LB, dim=1)[0] + return (M - m) * ((M - m > 0).float()) + + def metrics(self, scores): + S, S_ = scores['S'], scores['S_'] + M = torch.sum(S_) / len(S_) + loss = torch.sum(torch.log(1 + torch.exp(M - S))) + if 'Sb' in scores: + Sb, Sb_ = scores['Sb'], scores['Sb_'] + Mb = torch.sum(Sb_) / len(Sb_) + loss += self.lam * torch.sum(torch.log(1 + torch.exp(Mb - Sb))) + S_np, S__np = S.detach().cpu().numpy(), S_.detach().cpu().numpy() + Y = [1] * len(S_np) + [0] * len(S__np) + Z = list(S_np) + list(S__np) + + return {'loss': loss} + + def _get(self, data, k, test=False): + return data[k][0].to(self.device if not test else 'cpu') + +#TODO: Add PyDoc +"""Initial code: https://github.com/srendle/libfm/blob/master/src/fm_core/fm_model.h""" +class FactorizationMachine(HyperlinkPredictor): + def __init__(self, num_features=None, num_factors=10, reg_lambda=0.0, device='cpu'): + super().__init__(device) + self.num_features = num_features + self.num_factors = num_factors + self.reg_lambda = reg_lambda + + self.w0 = nn.Parameter(torch.zeros(1)) + self.w = None + self.V = None + self.fitted = False + + + def fit(self, X, y, edge_index, *args, **kwargs): + + if X is None: + self.num_features = int(edge_index.max().item() + 1) + else: + _, self.num_features = X.shape + + self.w = nn.Parameter(torch.zeros(self.num_features, device=self.device)) + self.V = nn.Parameter( + torch.randn(self.num_features, self.num_factors, device=self.device) * 0.01 + ) + + self.fitted = True + return self + + + def predict(self, edge_index: Tensor): + if not self.fitted: + num_nodes = int(edge_index[0].max()) + 1 + dummy_X = torch.zeros((1, num_nodes), device=self.device) + dummy_y = torch.zeros((1,), device=self.device) + self.fit(dummy_X, dummy_y, edge_index) + + X = torch.sparse_coo_tensor( + edge_index, + torch.ones(edge_index.shape[1], device=self.device), + (self.num_features, edge_index.shape[1]), + device=self.device + ).to_dense().T + + preds = self._predict_tensor(X) + + + positive_mask = preds.squeeze() > 0 + pos_edge_index = edge_index[:, positive_mask] + + return HyperlinkPredictionResult( + edge_index=pos_edge_index, + device=self.device + ) + + def _predict_tensor(self, X: Tensor) -> Tensor: + linear_part = torch.matmul(X, self.w) + self.w0 + + XV = torch.matmul(X, self.V) + XV_square = XV.pow(2).sum(dim=1) + X_square_V_square = torch.matmul(X.pow(2), self.V.pow(2)).sum(dim=1) + interactions = 0.5 * (XV_square - X_square_V_square) + + return linear_part + interactions diff --git a/hyperbench/hyperlink_prediction/models/hyperlink_prediction_result.py b/hyperbench/hyperlink_prediction/models/hyperlink_prediction_result.py index a20310c..a4a92ec 100644 --- a/hyperbench/hyperlink_prediction/models/hyperlink_prediction_result.py +++ b/hyperbench/hyperlink_prediction/models/hyperlink_prediction_result.py @@ -4,7 +4,7 @@ class HyperlinkPredictionResult(ABC): - def __init__(self, + def __init__(self, edge_index: Tensor, device="cpu"): self.device = device @@ -12,17 +12,17 @@ def __init__(self, _, self.__edge_index[1] = torch.unique(self.__edge_index[1], return_inverse=True) - @property - def edge_index(self) -> Tensor: - return self.__edge_index + @property + def edge_index(self) -> Tensor: + return self.__edge_index - @property - def num_edges(self): - return torch.unique(self.__edge_index[1]).shape[0] + @property + def num_edges(self): + return torch.unique(self.__edge_index[1]).shape[0] - @property - def y(self) -> Tensor: - return torch.ones((self.num_edges, 1), device=self.device) + @property + def y(self) -> Tensor: + return torch.ones((self.edge_index.size(1), 1), device=self.device) - def __repr__(self): - return self.edge_index.__repr__() + def __repr__(self): + return self.edge_index.__repr__() diff --git a/hyperbench/pipelines/pipeline.py b/hyperbench/pipelines/pipeline.py index 1dd0dd5..5bea8b1 100644 --- a/hyperbench/pipelines/pipeline.py +++ b/hyperbench/pipelines/pipeline.py @@ -4,13 +4,17 @@ def execute(): parser = argparse.ArgumentParser(description="Insert dataset_name, insert negative_sampling method") parser.add_argument('--dataset_name', type=str, help="The dataset's name, possible dataset's name: \nIMDB,\nCOURSERA,\nARXIV", required=True) parser.add_argument('--negative_sampling', type=str, help="negative sampling method to use, possible methods: \n SizedHypergraphNegativeSampler,\nMotifHypergraphNegativeSampler,\nCliqueHypergraphNegativeSampler", required=True) - parser.add_argument('--hlp_method', type=str, help="hyperlink prediction method to use, possible method: \nCommonNeighbors", required=True) + parser.add_argument('--alpha', type= float, help="first parameter for the method SizedNegativeSampler", default=0.5) + parser.add_argument('--beta', type= int, help="second parameter for the method SizedNegativeSampler", default=1) + parser.add_argument('--hlp_method', type=str, help="hyperlink prediction method to use, possible methods: \nCommonNeighbors,\nNeuralHP, \nFactorizationMachine", required=True) parser.add_argument('--output_path', type=str, help="Path to save the results", default="./results") parser.add_argument('--random_seed', type=int, help="Random seed for reproducibility", default=None) parser.add_argument('--test', type=bool, help="If true, runs in test mode", default=False) args = parser.parse_args() dataset_name= args.dataset_name negative_method = args.negative_sampling + alpha = args.alpha + beta = args.beta hlp_method = args.hlp_method output_path = args.output_path random_seed = args.random_seed @@ -24,8 +28,7 @@ def execute(): import time from random import randint, seed from ..hyperlink_prediction.loader.dataloader import DatasetLoader - from ..hyperlink_prediction.models.hyperlink_prediction_algorithm import CommonNeighbors - from ..utils.data_and_sampling_selector import setNegativeSamplingAlgorithm, select_dataset + from ..utils.data_and_sampling_selector import setNegativeSamplingAlgorithm, select_dataset, setHyperlinkPredictionAlgorithm from ..utils.hyperlink_train_test_split import train_test_split from torch_geometric.nn import HypergraphConv from tqdm.auto import trange, tqdm @@ -82,7 +85,9 @@ def pre_transform(data: HyperGraphData): loader = DatasetLoader( train_dataset, - negative_method, + negative_method, + alpha, + beta, dataset._data.num_nodes, batch_size=4000, shuffle=True, @@ -160,7 +165,7 @@ def forward(self, x, x_e, edge_index): criterion = torch.nn.BCEWithLogitsLoss() test_criterion = torch.nn.BCELoss() - negative_hypergraph = setNegativeSamplingAlgorithm(negative_method, test_dataset._data.num_nodes).generate(test_dataset._data.edge_index) + negative_hypergraph = setNegativeSamplingAlgorithm(negative_method, test_dataset._data.num_nodes, alpha, beta).generate(test_dataset._data.edge_index) edge_index_test = test_dataset._data.edge_index.clone() test_dataset.y = torch.vstack(( torch.ones((test_dataset._data.edge_index[1].max() + 1, 1)), @@ -174,16 +179,16 @@ def forward(self, x, x_e, edge_index): y = test_dataset.y, num_nodes = test_dataset._data.num_nodes ) + print(test_dataset_.edge_attrs) + hlp_method = setHyperlinkPredictionAlgorithm(hlp_method) for epoch in trange(150): model.train() optimizer.zero_grad() for i, h in tqdm(enumerate(loader), leave = False): - h = h - negative_sampler = setNegativeSamplingAlgorithm(negative_method, h.num_nodes) + negative_sampler = setNegativeSamplingAlgorithm(negative_method, h.num_nodes, alpha, beta) negative_test = negative_sampler.generate(h.edge_index) - - hlp_method = CommonNeighbors(h.num_nodes) + hlp_method.X = h.x hlp_result = hlp_method.predict(negative_test.edge_index) y_pos = torch.ones(hlp_result.edge_index.size(1), 1) diff --git a/hyperbench/utils/__init__.py b/hyperbench/utils/__init__.py index 19cfe99..200795f 100644 --- a/hyperbench/utils/__init__.py +++ b/hyperbench/utils/__init__.py @@ -1,7 +1,8 @@ from .hyperlink_train_test_split import train_test_split -from .data_and_sampling_selector import setNegativeSamplingAlgorithm, select_dataset +from .data_and_sampling_selector import setNegativeSamplingAlgorithm, select_dataset, setHyperlinkPredictionAlgorithm __all__ = [ 'train_test_split', 'setNegativeSamplingAlgorithm', + 'setHyperlinkPredictionAlgorithm', 'select_dataset' ] \ No newline at end of file diff --git a/hyperbench/utils/data_and_sampling_selector.py b/hyperbench/utils/data_and_sampling_selector.py index 2732002..d884dec 100644 --- a/hyperbench/utils/data_and_sampling_selector.py +++ b/hyperbench/utils/data_and_sampling_selector.py @@ -1,11 +1,12 @@ from ..hyperlink_prediction.datasets import ARBDataset, IMDBHypergraphDataset, ARXIVHypergraphDataset, COURSERAHypergraphDataset, CHLPBaseDataset from ..negative_sampling.hypergraph_negative_sampling_algorithm import SizedHypergraphNegativeSampler, MotifHypergraphNegativeSampler, CliqueHypergraphNegativeSampler, HypergraphNegativeSampler +from ..hyperlink_prediction.models import CommonNeighbors, NeuralHP, FactorizationMachine, HyperlinkPredictor -def setNegativeSamplingAlgorithm(ns_algorithm: str, num_node: int): +def setNegativeSamplingAlgorithm(ns_algorithm: str, num_node: int, alpha: float|int, beta: int): ns_method : HypergraphNegativeSampler match(ns_algorithm): case 'SizedHypergraphNegativeSampler': - ns_method = SizedHypergraphNegativeSampler(num_node) + ns_method = SizedHypergraphNegativeSampler(num_node, alpha= alpha, beta= beta) case 'MotifHypergraphNegativeSampler': ns_method = MotifHypergraphNegativeSampler(num_node) case 'CliqueHypergraphNegativeSampler': @@ -13,6 +14,17 @@ def setNegativeSamplingAlgorithm(ns_algorithm: str, num_node: int): return ns_method +def setHyperlinkPredictionAlgorithm(hlp_algorithm: str): + hlp_method: HyperlinkPredictor + match(hlp_algorithm): + case 'CommonNeighbors': + hlp_method = CommonNeighbors() + case 'NeuralHP': + hlp_method = NeuralHP() + case 'FactorizationMachine': + hlp_method = FactorizationMachine() + return hlp_method + def select_dataset(ds: str, pre_transform): dataset : ARBDataset diff --git a/tests/methods_test.py b/tests/methods_test.py index dbae1dc..c640f7f 100644 --- a/tests/methods_test.py +++ b/tests/methods_test.py @@ -4,13 +4,13 @@ def dataset_dict(): datasets = {} dataset_arb = [ - "coauth-DBLP", - "coauth-MAG-Geology", + #"coauth-DBLP", + #"coauth-MAG-Geology", "email-Enron", - "tags-math-sx", + #"tags-math-sx", "contact-high-school", - "contact-primary-school", - "NDC-substances" + #"contact-primary-school", + #"NDC-substances" ] datasets_CHLP = [ "IMDB", @@ -22,7 +22,11 @@ def dataset_dict(): "MotifHypergraphNegativeSampler", "CliqueHypergraphNegativeSampler" ] - hlp_methods = ["CommonNeighbors"] + hlp_methods = [ + "CommonNeighbors", + "NeuralHP", + "FactorizationMachine" + ] ns_hlp_union = [] for ns in negative_methods: diff --git a/uv.lock b/uv.lock index c6f2240..2d6e715 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.9" resolution-markers = [ "python_full_version >= '3.12'", @@ -790,7 +790,7 @@ wheels = [ ] [[package]] -name = "hypernegative" +name = "hyperbench" version = "0.1.2.7" source = { editable = "." } dependencies = [