diff --git a/README.md b/README.md index 556325a..eb9da44 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,12 @@ # HyperBench -[![Contributors][contributors-shield]][contributors-url] [![Forks][forks-shield]][forks-url] [![Stargazers][stars-shield]][stars-url] +[![Contributors][contributors-shield]][contributors-url] + [![Issues][issues-shield]][issues-url] [![project_license][license-shield]][license-url] + [![codecov](https://codecov.io/github/hypernetwork-research-group/hyperbench/graph/badge.svg?token=XE0TB5JMOS)](https://codecov.io/github/hypernetwork-research-group/hyperbench) ## About the project @@ -95,3 +97,4 @@ WIP [issues-url]: https://github.com/hypernetwork-research-group/hyperbench/issues [license-shield]: https://img.shields.io/github/license/hypernetwork-research-group/hyperbench.svg?style=for-the-badge [license-url]: https://github.com/hypernetwork-research-group/hyperbench/blob/master/LICENSE.txt +[docs]: https://hypernetwork-research-group.github.io/hyperbench/ diff --git a/hyperbench/data/dataset.py b/hyperbench/data/dataset.py index c4ba015..e2a2172 100644 --- a/hyperbench/data/dataset.py +++ b/hyperbench/data/dataset.py @@ -1,12 +1,12 @@ import json import os -import gdown import tempfile import torch import zstandard as zstd +import requests from enum import Enum -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Optional, Tuple, Union from torch import Tensor from torch.utils.data import Dataset as TorchDataset from hyperbench.types.hypergraph import HIFHypergraph @@ -19,11 +19,24 @@ class DatasetNames(Enum): Enumeration of available datasets. """ - ALGEBRA = "1" - EMAIL_ENRON = "2" - ARXIV = "3" - DBLP = "4" - THREADSMATHSX = "5" + ALGEBRA = "algebra" + AMAZON = "amazon" + CONTACT_HIGH_SCHOOL = "contact-high-school" + CONTACT_PRIMARY_SCHOOL = "contact-primary-school" + DBLP = "dblp" + EMAIL_ENRON = "email-Enron" + EMAIL_W3C = "email-W3C" + GEOMETRY = "geometry" + GOT = "got" + MUSIC_BLUES_REVIEWS = "music-blues-reviews" + NBA = "nba" + NDC_CLASSES = "NDC-classes" + NDC_SUBSTANCES = "NDC-substances" + RESTAURANT_REVIEWS = "restaurant-reviews" + THREADS_ASK_UBUNTU = "threads-ask-ubuntu" + THREADS_MATH_SX = "threads-math-sx" + TWITTER = "twitter" + VEGAS_BARS_REVIEWS = "vegas-bars-reviews" class HIFConverter: @@ -33,38 +46,51 @@ class HIFConverter: """ @staticmethod - def load_from_hif(dataset_name: str | None, file_id: str | None) -> HIFHypergraph: - if dataset_name is None or file_id is None: + def load_from_hif( + dataset_name: Optional[str], save_on_disk: bool = False + ) -> HIFHypergraph: + if dataset_name is None: raise ValueError( - f"Dataset name (provided: {dataset_name}) and file ID (provided: {file_id}) must be provided." + f"Dataset name (provided: {dataset_name}) must be provided." ) if dataset_name not in DatasetNames.__members__: raise ValueError(f"Dataset '{dataset_name}' not found.") - dataset_name_lower = dataset_name.lower() + dataset_name = DatasetNames[dataset_name].value current_dir = os.path.dirname(os.path.abspath(__file__)) - zst_filename = os.path.join( - current_dir, "datasets", f"{dataset_name_lower}.json.zst" - ) + zst_filename = os.path.join(current_dir, "datasets", f"{dataset_name}.json.zst") - if os.path.exists(zst_filename): - dctx = zstd.ZstdDecompressor() - with ( - open(zst_filename, "rb") as input_f, - tempfile.NamedTemporaryFile( - mode="wb", suffix=".json", delete=False - ) as tmp_file, - ): - dctx.copy_stream(input_f, tmp_file) - output = tmp_file.name - else: - url = f"https://drive.google.com/uc?id={file_id}" + if not os.path.exists(zst_filename): + github_dataset_repo = f"https://github.com/hypernetwork-research-group/datasets/blob/main/{dataset_name}.json.zst?raw=true" + + response = requests.get(github_dataset_repo) + if response.status_code != 200: + raise ValueError( + f"Failed to download dataset '{dataset_name}' from GitHub. Status code: {response.status_code}" + ) - with tempfile.NamedTemporaryFile( - mode="w+", suffix=".json", delete=False - ) as tmp_file: - output = tmp_file.name - gdown.download(url=url, output=output, quiet=False, fuzzy=True) + if save_on_disk: + os.makedirs(os.path.join(current_dir, "datasets"), exist_ok=True) + with open(zst_filename, "wb") as f: + f.write(response.content) + else: + # Create temporary file for downloaded zst content + with tempfile.NamedTemporaryFile( + mode="wb", suffix=".json.zst", delete=False + ) as tmp_zst_file: + tmp_zst_file.write(response.content) + zst_filename = tmp_zst_file.name + + # Decompress the downloaded zst file + dctx = zstd.ZstdDecompressor() + with ( + open(zst_filename, "rb") as input_f, + tempfile.NamedTemporaryFile( + mode="wb", suffix=".json", delete=False + ) as tmp_file, + ): + dctx.copy_stream(input_f, tmp_file) + output = tmp_file.name with open(output, "r") as f: hiftext = json.load(f) @@ -79,7 +105,6 @@ class Dataset(TorchDataset): """ Base Dataset class for hypergraph datasets, extending PyTorch's Dataset. Attributes: - GDRIVE_FILE_ID (str): Google Drive file ID for the dataset. DATASET_NAME (str): Name of the dataset. hypergraph (HIFHypergraph): Loaded hypergraph instance. Methods: @@ -87,7 +112,6 @@ class Dataset(TorchDataset): process(): Processes the hypergraph into HData format. """ - GDRIVE_FILE_ID = None DATASET_NAME = None def __init__(self) -> None: @@ -129,7 +153,7 @@ def download(self) -> HIFHypergraph: """ if hasattr(self, "hypergraph") and self.hypergraph is not None: return self.hypergraph - hypergraph = HIFConverter.load_from_hif(self.DATASET_NAME, self.GDRIVE_FILE_ID) + hypergraph = HIFConverter.load_from_hif(self.DATASET_NAME) return hypergraph def process(self) -> HData: @@ -211,17 +235,17 @@ def process(self) -> HData: return HData(x, edge_index, edge_attr, num_nodes, num_edges) def transform_node_attrs( - self, attrs: Dict[str, Any], attr_keys: List[str] | None = None + self, attrs: Dict[str, Any], attr_keys: Optional[List[str]] = None ) -> Tensor: return self.transform_attrs(attrs, attr_keys) def transform_edge_attrs( - self, attrs: Dict[str, Any], attr_keys: List[str] | None = None + self, attrs: Dict[str, Any], attr_keys: Optional[List[str]] = None ) -> Tensor: return self.transform_attrs(attrs, attr_keys) def transform_attrs( - self, attrs: Dict[str, Any], attr_keys: List[str] | None = None + self, attrs: Dict[str, Any], attr_keys: Optional[List[str]] = None ) -> Tensor: """ Extract and encode numeric node attributes to tensor. @@ -268,9 +292,6 @@ def __collect_attr_keys(self, attr_keys: List[Dict[str, Any]]) -> List[str]: return unique_keys def __get_node_ids_to_sample(self, id: int | List[int]) -> List[int]: - if isinstance(id, int): - return [id] - if isinstance(id, list): if len(id) < 1: raise ValueError("Index list cannot be empty.") @@ -280,6 +301,8 @@ def __get_node_ids_to_sample(self, id: int | List[int]) -> List[int]: ) return list(set(id)) + return [id] + def __validate_node_ids(self, node_ids: List[int]) -> None: for id in node_ids: if id < 0 or id >= self.__len__(): @@ -386,14 +409,23 @@ def __to_0based_ids( class AlgebraDataset(Dataset): DATASET_NAME = "ALGEBRA" - GDRIVE_FILE_ID = "1-H21_mZTcbbae4U_yM3xzXX19VhbCZ9C" class DBLPDataset(Dataset): DATASET_NAME = "DBLP" - GDRIVE_FILE_ID = "1oiXQWdybAAUvhiYbFY1R9Qd0jliMSSQh" class ThreadsMathsxDataset(Dataset): DATASET_NAME = "THREADSMATHSX" - GDRIVE_FILE_ID = "1jS4FDs7ME-mENV6AJwCOb_glXKMT7YLQ" + + +if __name__ == "__main__": + for dataset in DatasetNames: + print(f"Processing dataset: {dataset.value}") + if dataset == DatasetNames.EMAIL_ENRON: + load_hif = HIFConverter.load_from_hif(dataset.name, save_on_disk=True) + continue + load_hif = HIFConverter.load_from_hif(dataset.name) + print( + f"Loaded HIF hypergraph with {len(load_hif.nodes)} nodes and {len(load_hif.edges)} edges." + ) diff --git a/hyperbench/tests/data/dataset_test.py b/hyperbench/tests/data/dataset_test.py index c41b040..0cead09 100644 --- a/hyperbench/tests/data/dataset_test.py +++ b/hyperbench/tests/data/dataset_test.py @@ -1,3 +1,4 @@ +import requests import torch import pytest from unittest.mock import patch, mock_open @@ -10,7 +11,7 @@ # Reusable fixture for hypergraph instances used in multiple tests @pytest.fixture -def sample_hypergraph(): +def mock_sample_hypergraph(): return HIFHypergraph( network_type="undirected", nodes=[{"node": "0"}, {"node": "1"}], @@ -20,7 +21,7 @@ def sample_hypergraph(): @pytest.fixture -def simple_mock_hypergraph(): +def mock_simple_hypergraph(): """Simple hypergraph with 2 nodes for basic tests.""" return HIFHypergraph( network_type="undirected", @@ -31,22 +32,7 @@ def simple_mock_hypergraph(): @pytest.fixture -def three_node_mock_hypergraph(): - """Hypergraph with 3 nodes for validation tests.""" - return HIFHypergraph( - network_type="undirected", - nodes=[ - {"node": "0", "attrs": {}}, - {"node": "1", "attrs": {}}, - {"node": "2", "attrs": {}}, - ], - edges=[{"edge": "0", "attrs": {}}], - incidences=[{"node": "0", "edge": "0"}], - ) - - -@pytest.fixture -def three_node_mock_weighted_hypergraph(): +def mock_three_node_weighted_hypergraph(): return HIFHypergraph( network_type="undirected", nodes=[ @@ -67,7 +53,7 @@ def three_node_mock_weighted_hypergraph(): @pytest.fixture -def four_node_mock_hypergraph(): +def mock_four_node_hypergraph(): """Hypergraph with 4 nodes and 2 edges for sampling tests.""" return HIFHypergraph( network_type="undirected", @@ -88,7 +74,7 @@ def four_node_mock_hypergraph(): @pytest.fixture -def five_node_mock_hypergraph(): +def mock_five_node_hypergraph(): """Hypergraph with 5 nodes for duplicate testing.""" return HIFHypergraph( network_type="undirected", @@ -105,7 +91,7 @@ def five_node_mock_hypergraph(): @pytest.fixture -def no_edge_attr_mock_hypergraph(): +def mock_no_edge_attr_hypergraph(): return HIFHypergraph( network_type="undirected", nodes=[ @@ -121,7 +107,7 @@ def no_edge_attr_mock_hypergraph(): @pytest.fixture -def multiple_edges_attr_mock_hypergraph(): +def mock_multiple_edges_attr_hypergraph(): return HIFHypergraph( network_type="undirected", nodes=[ @@ -144,19 +130,18 @@ def multiple_edges_attr_mock_hypergraph(): ) -def test_fixture(sample_hypergraph): - assert sample_hypergraph.network_type == "undirected" - assert len(sample_hypergraph.nodes) == 2 - assert len(sample_hypergraph.edges) == 1 - assert len(sample_hypergraph.incidences) == 1 +def test_fixture(mock_sample_hypergraph): + assert mock_sample_hypergraph.network_type == "undirected" + assert len(mock_sample_hypergraph.nodes) == 2 + assert len(mock_sample_hypergraph.edges) == 1 + assert len(mock_sample_hypergraph.incidences) == 1 def test_HIFConverter(): """Test loading a known HIF dataset using HIFConverter.""" dataset_name = "ALGEBRA" - file_id = "1-H21_mZTcbbae4U_yM3xzXX19VhbCZ9C" - hypergraph = HIFConverter.load_from_hif(dataset_name, file_id) + hypergraph = HIFConverter.load_from_hif(dataset_name) assert hypergraph is not None assert hasattr(hypergraph, "nodes") @@ -172,35 +157,171 @@ def test_HIFConverter(): def test_HIFConverter_invalid_dataset(): """Test loading an invalid dataset""" dataset_name = "INVALID_DATASET" - file_id = "invalid_file_id" with pytest.raises(ValueError, match="Dataset 'INVALID_DATASET' not found"): - HIFConverter.load_from_hif(dataset_name, file_id) + HIFConverter.load_from_hif(dataset_name) def test_HIFConverter_invalid_hif_format(): """Test loading an invalid HIF format dataset.""" - dataset_name = "EMAIL_ENRON" - file_id = "test_file_id" + dataset_name = "ALGEBRA" invalid_hif_json = '{"network-type": "undirected", "nodes": []}' with ( - patch("hyperbench.data.dataset.gdown.download") as mock_download, - patch("builtins.open", mock_open(read_data=invalid_hif_json)), + patch("hyperbench.data.dataset.requests.get") as mock_get, patch("hyperbench.data.dataset.validate_hif_json", return_value=False), + patch("builtins.open", mock_open(read_data=invalid_hif_json)), + patch("hyperbench.data.dataset.zstd.ZstdDecompressor"), ): + mock_response = mock_get.return_value + mock_response.status_code = 200 + mock_response.content = b"mock_zst_content" + + with pytest.raises(ValueError, match="Dataset 'algebra' is not HIF-compliant"): + HIFConverter.load_from_hif(dataset_name) + + +def test_HIFConverter_save_on_disk(): + """Test downloading dataset with save_on_disk=True.""" + dataset_name = "ALGEBRA" + + mock_hypergraph = HIFHypergraph( + network_type="undirected", + nodes=[{"node": "0"}, {"node": "1"}], + edges=[{"edge": "0"}], + incidences=[{"node": "0", "edge": "0"}], + ) + + mock_hif_json = { + "network-type": "undirected", + "nodes": [{"node": "0"}, {"node": "1"}], + "edges": [{"edge": "0"}], + "incidences": [{"node": "0", "edge": "0"}], + } + + with ( + patch("hyperbench.data.dataset.requests.get") as mock_get, + patch("hyperbench.data.dataset.os.path.exists", return_value=False), + patch("hyperbench.data.dataset.os.makedirs"), + patch("builtins.open", mock_open()) as mock_file, + patch("hyperbench.data.dataset.zstd.ZstdDecompressor") as mock_decomp, + patch("hyperbench.data.dataset.json.load", return_value=mock_hif_json), + patch("hyperbench.data.dataset.validate_hif_json", return_value=True), + patch.object(HIFHypergraph, "from_hif", return_value=mock_hypergraph), + ): + # Mock successful download + mock_response = mock_get.return_value + mock_response.status_code = 200 + mock_response.content = b"mock_zst_content" + + # Mock decompressor + mock_stream = mock_decomp.return_value.stream_reader.return_value + mock_stream.__enter__ = lambda self: mock_stream + mock_stream.__exit__ = lambda self, *args: None + + hypergraph = HIFConverter.load_from_hif(dataset_name, save_on_disk=True) + + assert hypergraph is not None + assert hypergraph.network_type == "undirected" + mock_get.assert_called_once() + # Verify file was written to disk (not temp file) + assert mock_file.call_count >= 2 # Once for write, once for read + + +def test_HIFConverter_temp_file(): + """Test downloading dataset with save_on_disk=False (uses temp file).""" + dataset_name = "ALGEBRA" + + mock_hypergraph = HIFHypergraph( + network_type="undirected", + nodes=[{"node": "0"}, {"node": "1"}], + edges=[{"edge": "0"}], + incidences=[{"node": "0", "edge": "0"}], + ) + + mock_hif_json = { + "network-type": "undirected", + "nodes": [{"node": "0"}, {"node": "1"}], + "edges": [{"edge": "0"}], + "incidences": [{"node": "0", "edge": "0"}], + } + + with ( + patch("hyperbench.data.dataset.requests.get") as mock_get, + patch("hyperbench.data.dataset.os.path.exists", return_value=False), + patch("hyperbench.data.dataset.tempfile.NamedTemporaryFile") as mock_temp, + patch("builtins.open", mock_open()), + patch("hyperbench.data.dataset.zstd.ZstdDecompressor") as mock_decomp, + patch("hyperbench.data.dataset.json.load", return_value=mock_hif_json), + patch("hyperbench.data.dataset.validate_hif_json", return_value=True), + patch.object(HIFHypergraph, "from_hif", return_value=mock_hypergraph), + ): + # Mock successful download + mock_response = mock_get.return_value + mock_response.status_code = 200 + mock_response.content = b"mock_zst_content" + + # Mock temp file + mock_temp_file = mock_temp.return_value.__enter__.return_value + mock_temp_file.name = "/tmp/fake_temp.json.zst" + + # Mock decompressor + mock_stream = mock_decomp.return_value.stream_reader.return_value + mock_stream.__enter__ = lambda self: mock_stream + mock_stream.__exit__ = lambda self, *args: None + + hypergraph = HIFConverter.load_from_hif(dataset_name, save_on_disk=False) + + assert hypergraph is not None + assert hypergraph.network_type == "undirected" + mock_get.assert_called_once() + # Verify temp file was used + assert mock_temp.call_count >= 1 + + +def test_HIFConverter_download_failure(): + """Test handling of failed download from GitHub.""" + dataset_name = "ALGEBRA" + + with ( + patch("hyperbench.data.dataset.requests.get") as mock_get, + patch("hyperbench.data.dataset.os.path.exists", return_value=False), + ): + # Mock failed download + mock_response = mock_get.return_value + mock_response.status_code = 404 + with pytest.raises( - ValueError, match="Dataset 'EMAIL_ENRON' is not HIF-compliant" + ValueError, + match=r"Failed to download dataset 'algebra' from GitHub\. Status code: 404", ): - HIFConverter.load_from_hif(dataset_name, file_id) + HIFConverter.load_from_hif(dataset_name) + + mock_get.assert_called_once_with( + "https://github.com/hypernetwork-research-group/datasets/blob/main/algebra.json.zst?raw=true" + ) + + +def test_HIFConverter_network_error(): + """Test handling of network errors during download.""" + dataset_name = "ALGEBRA" + + with ( + patch("hyperbench.data.dataset.requests.get") as mock_get, + patch("hyperbench.data.dataset.os.path.exists", return_value=False), + ): + # Mock network error + mock_get.side_effect = requests.RequestException("Network error") + + with pytest.raises(requests.RequestException, match="Network error"): + HIFConverter.load_from_hif(dataset_name) def test_dataset_not_available(): """Test loading an unavailable dataset.""" class FakeMockDataset(Dataset): - GDRIVE_FILE_ID = "fake_id" DATASET_NAME = "FAKE" with pytest.raises(ValueError, match=r"Dataset 'FAKE' not found"): @@ -220,7 +341,6 @@ def test_AlgebraDataset_available(): with patch.object(HIFConverter, "load_from_hif", return_value=mock_hypergraph): dataset = AlgebraDataset() - assert dataset.GDRIVE_FILE_ID == "1-H21_mZTcbbae4U_yM3xzXX19VhbCZ9C" assert dataset.DATASET_NAME == "ALGEBRA" assert dataset.hypergraph is not None assert dataset.__len__() == dataset.hypergraph.num_nodes @@ -246,12 +366,11 @@ def test_dataset_name_none(): """Test that ValueError is raised if DATASET_NAME is None.""" class FakeMockDataset(Dataset): - GDRIVE_FILE_ID = "fake_id" DATASET_NAME = None with pytest.raises( ValueError, - match=r"Dataset name \(provided: None\) and file ID \(provided: fake_id\) must be provided\.", + match=r"Dataset name \(provided: None\) must be provided\.", ): FakeMockDataset() @@ -312,11 +431,11 @@ def test_dataset_process_with_edge_attributes(): ) # weight, type -def test_dataset_process_without_edge_attributes(no_edge_attr_mock_hypergraph): +def test_dataset_process_without_edge_attributes(mock_no_edge_attr_hypergraph): """Test that process handles edges without attributes.""" with patch.object( - HIFConverter, "load_from_hif", return_value=no_edge_attr_mock_hypergraph + HIFConverter, "load_from_hif", return_value=mock_no_edge_attr_hypergraph ): dataset = AlgebraDataset() @@ -326,11 +445,11 @@ def test_dataset_process_without_edge_attributes(no_edge_attr_mock_hypergraph): assert dataset.hdata.edge_attr is None -def test_dataset_process_edge_index_format(four_node_mock_hypergraph): +def test_dataset_process_edge_index_format(mock_four_node_hypergraph): """Test that edge_index has correct format [node_ids, edge_ids].""" with patch.object( - HIFConverter, "load_from_hif", return_value=four_node_mock_hypergraph + HIFConverter, "load_from_hif", return_value=mock_four_node_hypergraph ): dataset = AlgebraDataset() @@ -365,10 +484,10 @@ def test_dataset_process_random_ids(): assert dataset.hdata.edge_attr.shape == (2, 0) # 2 edges, 0 attributes each -def test_getitem_index_list_empty(simple_mock_hypergraph): +def test_getitem_index_list_empty(mock_simple_hypergraph): """Test __getitem__ with empty index list raises ValueError.""" with patch.object( - HIFConverter, "load_from_hif", return_value=simple_mock_hypergraph + HIFConverter, "load_from_hif", return_value=mock_simple_hypergraph ): dataset = AlgebraDataset() @@ -376,10 +495,10 @@ def test_getitem_index_list_empty(simple_mock_hypergraph): dataset[[]] -def test_getitem_index_list_too_large(five_node_mock_hypergraph): +def test_getitem_index_list_too_large(mock_five_node_hypergraph): """Test __getitem__ with index list larger than number of nodes raises ValueError.""" with patch.object( - HIFConverter, "load_from_hif", return_value=five_node_mock_hypergraph + HIFConverter, "load_from_hif", return_value=mock_five_node_hypergraph ): dataset = AlgebraDataset() @@ -390,10 +509,10 @@ def test_getitem_index_list_too_large(five_node_mock_hypergraph): dataset[[0, 1, 2, 3, 4, 5]] -def test_getitem_index_out_of_bounds(four_node_mock_hypergraph): +def test_getitem_index_out_of_bounds(mock_four_node_hypergraph): """Test __getitem__ with out-of-bounds index raises IndexError.""" with patch.object( - HIFConverter, "load_from_hif", return_value=four_node_mock_hypergraph + HIFConverter, "load_from_hif", return_value=mock_four_node_hypergraph ): dataset = AlgebraDataset() @@ -401,10 +520,12 @@ def test_getitem_index_out_of_bounds(four_node_mock_hypergraph): dataset[4] -def test_getitem_single_index(sample_hypergraph): +def test_getitem_single_index(mock_sample_hypergraph): """Test __getitem__ with a single index.""" - with patch.object(HIFConverter, "load_from_hif", return_value=sample_hypergraph): + with patch.object( + HIFConverter, "load_from_hif", return_value=mock_sample_hypergraph + ): dataset = AlgebraDataset() node_data = dataset[1] @@ -412,11 +533,11 @@ def test_getitem_single_index(sample_hypergraph): assert node_data.edge_index.shape == (2, 0) -def test_getitem_list_index(four_node_mock_hypergraph): +def test_getitem_list_index(mock_four_node_hypergraph): """Test __getitem__ with a list of indices.""" with patch.object( - HIFConverter, "load_from_hif", return_value=four_node_mock_hypergraph + HIFConverter, "load_from_hif", return_value=mock_four_node_hypergraph ): dataset = AlgebraDataset() @@ -425,11 +546,11 @@ def test_getitem_list_index(four_node_mock_hypergraph): assert node_data_list.edge_index.shape == (2, 3) -def test_getitem_with_edge_attr(three_node_mock_weighted_hypergraph): +def test_getitem_with_edge_attr(mock_three_node_weighted_hypergraph): """Test __getitem__ returns correct edge_attr when present.""" with patch.object( - HIFConverter, "load_from_hif", return_value=three_node_mock_weighted_hypergraph + HIFConverter, "load_from_hif", return_value=mock_three_node_weighted_hypergraph ): dataset = AlgebraDataset() @@ -441,11 +562,11 @@ def test_getitem_with_edge_attr(three_node_mock_weighted_hypergraph): assert node_data.edge_attr[0].item() == 1 -def test_getitem_without_edge_attr(no_edge_attr_mock_hypergraph): +def test_getitem_without_edge_attr(mock_no_edge_attr_hypergraph): """Test __getitem__ returns None for edge_attr when not present.""" with patch.object( - HIFConverter, "load_from_hif", return_value=no_edge_attr_mock_hypergraph + HIFConverter, "load_from_hif", return_value=mock_no_edge_attr_hypergraph ): dataset = AlgebraDataset() @@ -453,11 +574,11 @@ def test_getitem_without_edge_attr(no_edge_attr_mock_hypergraph): assert node_data.edge_attr is None -def test_getitem_with_multiple_edges_attr(multiple_edges_attr_mock_hypergraph): +def test_getitem_with_multiple_edges_attr(mock_multiple_edges_attr_hypergraph): """Test __getitem__ correctly filters edge_attr for sampled edges.""" with patch.object( - HIFConverter, "load_from_hif", return_value=multiple_edges_attr_mock_hypergraph + HIFConverter, "load_from_hif", return_value=mock_multiple_edges_attr_hypergraph ): dataset = AlgebraDataset() @@ -553,7 +674,6 @@ def test_transform_attrs_empty_attrs(): class TestDataset(Dataset): DATASET_NAME = "TEST" - GDRIVE_FILE_ID = "test_id" dataset = TestDataset() @@ -586,7 +706,6 @@ def test_process_with_inconsistent_node_attributes(): class TestDataset(Dataset): DATASET_NAME = "TEST" - GDRIVE_FILE_ID = "test_id" dataset = TestDataset() @@ -621,7 +740,6 @@ def test_process_with_no_node_attributes_fallback(): class TestDataset(Dataset): DATASET_NAME = "TEST" - GDRIVE_FILE_ID = "test_id" dataset = TestDataset() @@ -650,7 +768,6 @@ def test_process_with_single_node_attribute(): class TestDataset(Dataset): DATASET_NAME = "TEST" - GDRIVE_FILE_ID = "test_id" dataset = TestDataset() @@ -683,7 +800,6 @@ def test_getitem_preserves_node_attributes(): class TestDataset(Dataset): DATASET_NAME = "TEST" - GDRIVE_FILE_ID = "test_id" dataset = TestDataset() @@ -713,7 +829,6 @@ def test_transform_attrs_with_attr_keys_padding(): class TestDataset(Dataset): DATASET_NAME = "TEST" - GDRIVE_FILE_ID = "test_id" dataset = TestDataset() diff --git a/pyproject.toml b/pyproject.toml index d9a8777..f472788 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,6 @@ readme = "README.md" requires-python = ">=3.10" dependencies = [ "fastjsonschema>=2.21.2", - "gdown>=5.2.1", "lightning>=2.5.5", "numpy>=1.240", "requests>=2.32.5",