From 4648342090115e8c042798b8be23dcd90d174412 Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Sun, 23 Nov 2025 17:53:58 +0100 Subject: [PATCH 01/13] Commit for TDL Challenge 2025 --- configs/dataset/graph/anytown.yaml | 48 +++ configs/dataset/graph/balerman.yaml | 48 +++ configs/dataset/graph/ctown.yaml | 48 +++ configs/dataset/graph/dtown.yaml | 48 +++ configs/dataset/graph/exn.yaml | 48 +++ configs/dataset/graph/ky1.yaml | 48 +++ configs/dataset/graph/ky10.yaml | 48 +++ configs/dataset/graph/ky6.yaml | 48 +++ configs/dataset/graph/ky8.yaml | 48 +++ configs/dataset/graph/ltown.yaml | 48 +++ configs/dataset/graph/modena.yaml | 48 +++ test/pipeline/test_pipeline.py | 12 +- topobench/data/datasets/wdn_dataset.py | 400 ++++++++++++++++++ .../data/loaders/graph/wdn_dataset_loader.py | 89 ++++ 14 files changed, 1024 insertions(+), 5 deletions(-) create mode 100644 configs/dataset/graph/anytown.yaml create mode 100644 configs/dataset/graph/balerman.yaml create mode 100644 configs/dataset/graph/ctown.yaml create mode 100644 configs/dataset/graph/dtown.yaml create mode 100644 configs/dataset/graph/exn.yaml create mode 100644 configs/dataset/graph/ky1.yaml create mode 100644 configs/dataset/graph/ky10.yaml create mode 100644 configs/dataset/graph/ky6.yaml create mode 100644 configs/dataset/graph/ky8.yaml create mode 100644 configs/dataset/graph/ltown.yaml create mode 100644 configs/dataset/graph/modena.yaml create mode 100644 topobench/data/datasets/wdn_dataset.py create mode 100644 topobench/data/loaders/graph/wdn_dataset_loader.py diff --git a/configs/dataset/graph/anytown.yaml b/configs/dataset/graph/anytown.yaml new file mode 100644 index 000000000..ffa79b4f4 --- /dev/null +++ b/configs/dataset/graph/anytown.yaml @@ -0,0 +1,48 @@ +# Dataset loader config +loader: + _target_: topobench.data.loaders.WDNDatasetLoader + parameters: + data_domain: graph + data_type: wdn_dataset + data_name: anytown + data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} + regressors: ["pressure", "demand", "flowrate"] + target: ["head"] + temporal: False + num_scenarios: 1 + num_instants: 1 + +# Variables among which one can choose as regressors and target variables +# pressure NODE +# demand NODE +# flowrate EDGE +# velocity EDGE +# head NODE +# head_loss EDGE +# friction_factor EDGE +# Be careful when overriding task_level, num_features and num_edge_features + +# Dataset parameters +parameters: + task: regression + task_level: node + num_features: 2 + num_edge_features: 1 + num_classes: 1 + loss_type: mse + monitor_metric: mse + +# splits +split_params: + learning_setting: transductive + data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} + data_seed: 0 + split_type: random + train_prop: 0.8 + k: 10 + +# Dataloader parameters +dataloader_params: + batch_size: 1 + num_workers: 1 + pin_memory: False diff --git a/configs/dataset/graph/balerman.yaml b/configs/dataset/graph/balerman.yaml new file mode 100644 index 000000000..42c7176c2 --- /dev/null +++ b/configs/dataset/graph/balerman.yaml @@ -0,0 +1,48 @@ +# Dataset loader config +loader: + _target_: topobench.data.loaders.WDNDatasetLoader + parameters: + data_domain: graph + data_type: wdn_dataset + data_name: balerman + data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} + regressors: ["pressure", "demand", "flowrate"] + target: ["head"] + temporal: False + num_scenarios: 1 + num_instants: 1 + +# Variables among which one can choose as regressors and target variables +# pressure NODE +# demand NODE +# flowrate EDGE +# velocity EDGE +# head NODE +# head_loss EDGE +# friction_factor EDGE +# Be careful when overriding task_level, num_features and num_edge_features + +# Dataset parameters +parameters: + task: regression + task_level: node + num_features: 2 + num_edge_features: 1 + num_classes: 1 + loss_type: mse + monitor_metric: mse + +# splits +split_params: + learning_setting: transductive + data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} + data_seed: 0 + split_type: random + train_prop: 0.8 + k: 10 + +# Dataloader parameters +dataloader_params: + batch_size: 1 + num_workers: 1 + pin_memory: False diff --git a/configs/dataset/graph/ctown.yaml b/configs/dataset/graph/ctown.yaml new file mode 100644 index 000000000..18dfba744 --- /dev/null +++ b/configs/dataset/graph/ctown.yaml @@ -0,0 +1,48 @@ +# Dataset loader config +loader: + _target_: topobench.data.loaders.WDNDatasetLoader + parameters: + data_domain: graph + data_type: wdn_dataset + data_name: ctown + data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} + regressors: ["pressure", "demand", "flowrate"] + target: ["head"] + temporal: False + num_scenarios: 1 + num_instants: 1 + +# Variables among which one can choose as regressors and target variables +# pressure NODE +# demand NODE +# flowrate EDGE +# velocity EDGE +# head NODE +# head_loss EDGE +# friction_factor EDGE +# Be careful when overriding task_level, num_features and num_edge_features + +# Dataset parameters +parameters: + task: regression + task_level: node + num_features: 2 + num_edge_features: 1 + num_classes: 1 + loss_type: mse + monitor_metric: mse + +# splits +split_params: + learning_setting: transductive + data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} + data_seed: 0 + split_type: random + train_prop: 0.8 + k: 10 + +# Dataloader parameters +dataloader_params: + batch_size: 1 + num_workers: 1 + pin_memory: False diff --git a/configs/dataset/graph/dtown.yaml b/configs/dataset/graph/dtown.yaml new file mode 100644 index 000000000..ccf10db67 --- /dev/null +++ b/configs/dataset/graph/dtown.yaml @@ -0,0 +1,48 @@ +# Dataset loader config +loader: + _target_: topobench.data.loaders.WDNDatasetLoader + parameters: + data_domain: graph + data_type: wdn_dataset + data_name: dtown + data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} + regressors: ["pressure", "demand", "flowrate"] + target: ["head"] + temporal: False + num_scenarios: 1 + num_instants: 1 + +# Variables among which one can choose as regressors and target variables +# pressure NODE +# demand NODE +# flowrate EDGE +# velocity EDGE +# head NODE +# head_loss EDGE +# friction_factor EDGE +# Be careful when overriding task_level, num_features and num_edge_features + +# Dataset parameters +parameters: + task: regression + task_level: node + num_features: 2 + num_edge_features: 1 + num_classes: 1 + loss_type: mse + monitor_metric: mse + +# splits +split_params: + learning_setting: transductive + data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} + data_seed: 0 + split_type: random + train_prop: 0.8 + k: 10 + +# Dataloader parameters +dataloader_params: + batch_size: 1 + num_workers: 1 + pin_memory: False diff --git a/configs/dataset/graph/exn.yaml b/configs/dataset/graph/exn.yaml new file mode 100644 index 000000000..eb76e744f --- /dev/null +++ b/configs/dataset/graph/exn.yaml @@ -0,0 +1,48 @@ +# Dataset loader config +loader: + _target_: topobench.data.loaders.WDNDatasetLoader + parameters: + data_domain: graph + data_type: wdn_dataset + data_name: exn + data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} + regressors: ["pressure", "demand", "flowrate"] + target: ["head"] + temporal: False + num_scenarios: 1 + num_instants: 1 + +# Variables among which one can choose as regressors and target variables +# pressure NODE +# demand NODE +# flowrate EDGE +# velocity EDGE +# head NODE +# head_loss EDGE +# friction_factor EDGE +# Be careful when overriding task_level, num_features and num_edge_features + +# Dataset parameters +parameters: + task: regression + task_level: node + num_features: 2 + num_edge_features: 1 + num_classes: 1 + loss_type: mse + monitor_metric: mse + +# splits +split_params: + learning_setting: transductive + data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} + data_seed: 0 + split_type: random + train_prop: 0.8 + k: 10 + +# Dataloader parameters +dataloader_params: + batch_size: 1 + num_workers: 1 + pin_memory: False diff --git a/configs/dataset/graph/ky1.yaml b/configs/dataset/graph/ky1.yaml new file mode 100644 index 000000000..0b862599a --- /dev/null +++ b/configs/dataset/graph/ky1.yaml @@ -0,0 +1,48 @@ +# Dataset loader config +loader: + _target_: topobench.data.loaders.WDNDatasetLoader + parameters: + data_domain: graph + data_type: wdn_dataset + data_name: ky1 + data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} + regressors: ["pressure", "demand", "flowrate"] + target: ["head"] + temporal: False + num_scenarios: 1 + num_instants: 1 + +# Variables among which one can choose as regressors and target variables +# pressure NODE +# demand NODE +# flowrate EDGE +# velocity EDGE +# head NODE +# head_loss EDGE +# friction_factor EDGE +# Be careful when overriding task_level, num_features and num_edge_features + +# Dataset parameters +parameters: + task: regression + task_level: node + num_features: 2 + num_edge_features: 1 + num_classes: 1 + loss_type: mse + monitor_metric: mse + +# splits +split_params: + learning_setting: transductive + data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} + data_seed: 0 + split_type: random + train_prop: 0.8 + k: 10 + +# Dataloader parameters +dataloader_params: + batch_size: 1 + num_workers: 1 + pin_memory: False diff --git a/configs/dataset/graph/ky10.yaml b/configs/dataset/graph/ky10.yaml new file mode 100644 index 000000000..b79d7ab0e --- /dev/null +++ b/configs/dataset/graph/ky10.yaml @@ -0,0 +1,48 @@ +# Dataset loader config +loader: + _target_: topobench.data.loaders.WDNDatasetLoader + parameters: + data_domain: graph + data_type: wdn_dataset + data_name: ky10 + data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} + regressors: ["pressure", "demand", "flowrate"] + target: ["head"] + temporal: False + num_scenarios: 1 + num_instants: 1 + +# Variables among which one can choose as regressors and target variables +# pressure NODE +# demand NODE +# flowrate EDGE +# velocity EDGE +# head NODE +# head_loss EDGE +# friction_factor EDGE +# Be careful when overriding task_level, num_features and num_edge_features + +# Dataset parameters +parameters: + task: regression + task_level: node + num_features: 2 + num_edge_features: 1 + num_classes: 1 + loss_type: mse + monitor_metric: mse + +# splits +split_params: + learning_setting: transductive + data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} + data_seed: 0 + split_type: random + train_prop: 0.8 + k: 10 + +# Dataloader parameters +dataloader_params: + batch_size: 1 + num_workers: 1 + pin_memory: False diff --git a/configs/dataset/graph/ky6.yaml b/configs/dataset/graph/ky6.yaml new file mode 100644 index 000000000..1e28f67a7 --- /dev/null +++ b/configs/dataset/graph/ky6.yaml @@ -0,0 +1,48 @@ +# Dataset loader config +loader: + _target_: topobench.data.loaders.WDNDatasetLoader + parameters: + data_domain: graph + data_type: wdn_dataset + data_name: ky6 + data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} + regressors: ["pressure", "demand", "flowrate"] + target: ["head"] + temporal: False + num_scenarios: 1 + num_instants: 1 + +# Variables among which one can choose as regressors and target variables +# pressure NODE +# demand NODE +# flowrate EDGE +# velocity EDGE +# head NODE +# head_loss EDGE +# friction_factor EDGE +# Be careful when overriding task_level, num_features and num_edge_features + +# Dataset parameters +parameters: + task: regression + task_level: node + num_features: 2 + num_edge_features: 1 + num_classes: 1 + loss_type: mse + monitor_metric: mse + +# splits +split_params: + learning_setting: transductive + data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} + data_seed: 0 + split_type: random + train_prop: 0.8 + k: 10 + +# Dataloader parameters +dataloader_params: + batch_size: 1 + num_workers: 1 + pin_memory: False diff --git a/configs/dataset/graph/ky8.yaml b/configs/dataset/graph/ky8.yaml new file mode 100644 index 000000000..e218f361b --- /dev/null +++ b/configs/dataset/graph/ky8.yaml @@ -0,0 +1,48 @@ +# Dataset loader config +loader: + _target_: topobench.data.loaders.WDNDatasetLoader + parameters: + data_domain: graph + data_type: wdn_dataset + data_name: ky8 + data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} + regressors: ["pressure", "demand", "flowrate"] + target: ["head"] + temporal: False + num_scenarios: 1 + num_instants: 1 + +# Variables among which one can choose as regressors and target variables +# pressure NODE +# demand NODE +# flowrate EDGE +# velocity EDGE +# head NODE +# head_loss EDGE +# friction_factor EDGE +# Be careful when overriding task_level, num_features and num_edge_features + +# Dataset parameters +parameters: + task: regression + task_level: node + num_features: 2 + num_edge_features: 1 + num_classes: 1 + loss_type: mse + monitor_metric: mse + +# splits +split_params: + learning_setting: transductive + data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} + data_seed: 0 + split_type: random + train_prop: 0.8 + k: 10 + +# Dataloader parameters +dataloader_params: + batch_size: 1 + num_workers: 1 + pin_memory: False diff --git a/configs/dataset/graph/ltown.yaml b/configs/dataset/graph/ltown.yaml new file mode 100644 index 000000000..45e94b815 --- /dev/null +++ b/configs/dataset/graph/ltown.yaml @@ -0,0 +1,48 @@ +# Dataset loader config +loader: + _target_: topobench.data.loaders.WDNDatasetLoader + parameters: + data_domain: graph + data_type: wdn_dataset + data_name: ltown + data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} + regressors: ["pressure", "demand", "flowrate"] + target: ["head"] + temporal: False + num_scenarios: 1 + num_instants: 1 + +# Variables among which one can choose as regressors and target variables +# pressure NODE +# demand NODE +# flowrate EDGE +# velocity EDGE +# head NODE +# head_loss EDGE +# friction_factor EDGE +# Be careful when overriding task_level, num_features and num_edge_features + +# Dataset parameters +parameters: + task: regression + task_level: node + num_features: 2 + num_edge_features: 1 + num_classes: 1 + loss_type: mse + monitor_metric: mse + +# splits +split_params: + learning_setting: transductive + data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} + data_seed: 0 + split_type: random + train_prop: 0.8 + k: 10 + +# Dataloader parameters +dataloader_params: + batch_size: 1 + num_workers: 1 + pin_memory: False diff --git a/configs/dataset/graph/modena.yaml b/configs/dataset/graph/modena.yaml new file mode 100644 index 000000000..348ad66b9 --- /dev/null +++ b/configs/dataset/graph/modena.yaml @@ -0,0 +1,48 @@ +# Dataset loader config +loader: + _target_: topobench.data.loaders.WDNDatasetLoader + parameters: + data_domain: graph + data_type: wdn_dataset + data_name: modena + data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} + regressors: ["pressure", "demand", "flowrate"] + target: ["head"] + temporal: False + num_scenarios: 1 + num_instants: 1 + +# Variables among which one can choose as regressors and target variables +# pressure NODE +# demand NODE +# flowrate EDGE +# velocity EDGE +# head NODE +# head_loss EDGE +# friction_factor EDGE +# Be careful when overriding task_level, num_features and num_edge_features + +# Dataset parameters +parameters: + task: regression + task_level: node + num_features: 2 + num_edge_features: 1 + num_classes: 1 + loss_type: mse + monitor_metric: mse + +# splits +split_params: + learning_setting: transductive + data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} + data_seed: 0 + split_type: random + train_prop: 0.8 + k: 10 + +# Dataloader parameters +dataloader_params: + batch_size: 1 + num_workers: 1 + pin_memory: False diff --git a/test/pipeline/test_pipeline.py b/test/pipeline/test_pipeline.py index 785987159..5d8ca8c93 100644 --- a/test/pipeline/test_pipeline.py +++ b/test/pipeline/test_pipeline.py @@ -3,9 +3,9 @@ import hydra from test._utils.simplified_pipeline import run - -DATASET = "graph/MUTAG" # ADD YOUR DATASET HERE -MODELS = ["graph/gcn", "cell/topotune", "simplicial/topotune"] # ADD ONE OR SEVERAL MODELS OF YOUR CHOICE HERE +print('Performing test pipeline...') +DATASET = "graph/anytown" # ADD YOUR DATASET HERE +MODELS = ["graph/gcn", ] # ADD ONE OR SEVERAL MODELS OF YOUR CHOICE HERE class TestPipeline: @@ -23,7 +23,7 @@ def test_pipeline(self): config_name="run.yaml", overrides=[ f"model={MODEL}", - f"dataset={DATASET}", # IF YOU IMPLEMENT A LARGE DATASET WITH AN OPTION TO USE A SLICE OF IT, ADD BELOW THE CORRESPONDING OPTION + f"dataset={DATASET}", "trainer.max_epochs=2", "trainer.min_epochs=1", "trainer.check_val_every_n_epoch=1", @@ -32,4 +32,6 @@ def test_pipeline(self): ], return_hydra_config=True ) - run(cfg) \ No newline at end of file + run(cfg) + print('Done!') + diff --git a/topobench/data/datasets/wdn_dataset.py b/topobench/data/datasets/wdn_dataset.py new file mode 100644 index 000000000..58753a25c --- /dev/null +++ b/topobench/data/datasets/wdn_dataset.py @@ -0,0 +1,400 @@ +"""Dataset class for WDN datasets.""" + +import json +import os +import os.path as osp +from typing import ClassVar + +import pandas as pd +import torch +from omegaconf import DictConfig +from torch_geometric.data import Data, InMemoryDataset, extract_zip +from torch_geometric.io import fs + +from topobench.data.utils import download_file_from_link + +# Main class for the dataset # + + +class WDNDataset(InMemoryDataset): + """Super-class to load datasets from "Large-Scale Multipurpose Benchmark Datasets For Assessing Data-Driven Deep Learning Approaches For Water Distribution Networks" (2023) with some configurables. + + Parameters + ---------- + root : str + Root directory where the dataset will be saved. + parameters : DictConfig + Configuration parameters for the dataset. + + Attributes + ---------- + URLS (dict): Name of the specific dataset to be istantiated. + FILE_FORMAT (dict): File format of the dataset. + RAW_FILE_NAMES (dict): List of file names of the dataset. + """ + + URL: ClassVar[str] = None + FILE_FORMAT: ClassVar[str] = "zip" + + def __init__(self, root: str, parameters: DictConfig) -> None: + self.root = root + self.parameters = parameters + super().__init__(root) + + out = fs.torch_load(self.processed_paths[0]) + assert len(out) in (3, 4) + + if len(out) == 3: + data, self.slices, self.sizes = out + data_cls = Data + else: + data, self.slices, self.sizes, data_cls = out + + if not isinstance(data, dict): + self.data = data + else: + self.data = data_cls.from_dict(data) + + assert isinstance(self._data, Data) + + @property + def raw_dir(self) -> str: + """Return the path to the raw directory of the dataset. + + Returns + ------- + str + Path to the raw directory. + """ + return osp.join(self.root, self.parameters.data_name, "raw") + + @property + def processed_dir(self) -> str: + """Return the path to the processed directory of the dataset. + + Returns + ------- + str + Path to the processed directory. + """ + return osp.join(self.root, self.parameters.data_name, "processed") + + @property + def raw_file_names(self) -> list[str]: + """Return the raw file names for the dataset. + + Returns + ------- + list[str] + List of raw file names. + """ + return [ + "pressure.csv", + "demand.csv", + "flowrate.csv", + "velocity.csv", + "head.csv", + "head_loss.csv", + "friction_factor.csv", + "attrs.json", + ] + + @property + def processed_file_names(self) -> str: + """Return the processed file name for the dataset. + + Returns + ------- + str + Processed file name. + """ + return "data.pt" + + def download(self) -> None: + r"""Download the dataset from a URL and saves it to the raw directory. + + Raises: + FileNotFoundError: If the dataset URL is not found. + """ + if self.URL is None or self.FILE_FORMAT is None: + raise FileNotFoundError( + f"URL or FILE_FORMAT not set for {self.parameters.data_name}" + ) + + download_file_from_link( + file_link=self.URL, + path_to_save=self.raw_dir, + dataset_name=self.parameters.data_name, + file_format=self.FILE_FORMAT, + ) + + # Extract zip + path = osp.join( + self.raw_dir, f"{self.parameters.data_name}.{self.FILE_FORMAT}" + ) + extract_zip(path, self.raw_dir) + + # Delete zip file + os.unlink(path) + + # Remove unretained files + retain_files = getattr( + self.parameters, "retain_files", self.raw_file_names + ) + + for f in self.raw_file_names: + if f not in retain_files and osp.exists(osp.join(self.raw_dir, f)): + os.remove(osp.join(self.raw_dir, f)) + + def process(self) -> None: + r"""Handle the data for the dataset. + + - Builds the graph from metadata + - Remaps node identifiers to progressive idxs + - Retrieves the correct temporal dimension + - Retrieves the regressors and target variables + - For each scenario, builds: + - A tensor (num_nodes, num_features, time_stamps) + for node features; + - A tensor (num_edges, num_features, time_stamps) + for edge features; + - A tensor (*, num_features, times_tamps) + for target variables accordingly to the target domain. + - Collated in a PyG Data object each of this graph adding + an identifier to the related scenario + - Save processed data. + """ + attributes_path = osp.join(self.raw_dir, "attrs.json") + + with open(attributes_path) as f: + attributes_data = json.load(f) + + # --- Build edge_index --- + adj_list = attributes_data["adj_list"] + edge_index, _ = zip( + *[((int(src), int(dst)), eid) for src, dst, eid in adj_list], + strict=False, + ) + + edge_index = ( + torch.tensor(edge_index, dtype=torch.long).t().contiguous() + ) + + # --- Remapping nodes to common ids --- + # Suppose original node IDs are in 'edge_index' + edge_index = edge_index.clone() + + # Map node IDs to 0..num_nodes-1 + unique_nodes = torch.unique(edge_index) + node_id_map = {old.item(): i for i, old in enumerate(unique_nodes)} + + for i in range(edge_index.size(1)): + edge_index[0, i] = node_id_map[edge_index[0, i].item()] + edge_index[1, i] = node_id_map[edge_index[1, i].item()] + + # --- Scenarios and time-instants selection --- + total_scenarios = attributes_data["gen_batch_size"] + total_duration = attributes_data["duration"] + num_scenarios = getattr( + self.parameters, "num_scenarios", total_scenarios + ) + num_instants = getattr(self.parameters, "num_instants", total_duration) + + # --- Variables to retain --- + regressors = getattr(self.parameters, "regressors", []) + targets = getattr(self.parameters, "targets", []) + + assert len(targets) == 1, ( + f"Expected exactly one target variable, got {len(targets)}." + ) + + retain_files = list(set(regressors + targets)) + + # --- Load all requested CSVs --- + data_tensors = {} + for file_name in retain_files: + csv_path = osp.join(self.raw_dir, f"{file_name}.csv") + if not osp.exists(csv_path): + continue + df = pd.read_csv(csv_path, index_col=0) + regressor_shape = df.shape[1] + + # Convert to (num_scenarios, duration, n) + tensor = torch.tensor(df.values, dtype=torch.float32) + tensor = tensor.reshape( + total_scenarios, total_duration, regressor_shape + ) + + # Select temporal subset + tensor = tensor[:num_scenarios, :num_instants, :] + data_tensors[file_name] = tensor + + # --- Helper function: determine if variable is node-level or edge-level --- + def is_edge_var(var_name: str) -> bool: + """Determine whether a variable name corresponds to an edge-level variable. + + Parameters + ---------- + var_name : str + The name of the variable to check. + + Returns + ------- + bool + ``True`` if the variable is an edge-level variable, ``False`` otherwise. + """ + + return var_name in [ + "flowrate", + "velocity", + "head_loss", + "friction_factor", + ] + + # --- Build graph samples --- + graph_samples = [] + for i in range(num_scenarios): + node_regressors, edge_regressors = [], [] + target_signals = [] + + # Separate by type and purpose + for var_name in regressors: + if var_name not in data_tensors: + continue + if is_edge_var(var_name): + edge_regressors.append( + data_tensors[var_name][i].unsqueeze(0) + ) + else: + node_regressors.append( + data_tensors[var_name][i].unsqueeze(0) + ) + + for var_name in targets: + if var_name not in data_tensors: + continue + if is_edge_var(var_name): + target_signals.append( + data_tensors[var_name][i].unsqueeze(0) + ) + else: + target_signals.append( + data_tensors[var_name][i].unsqueeze(0) + ) + + # Assemble node and edge attributes + x = torch.cat(node_regressors, dim=0) if node_regressors else None + edge_attr = ( + torch.cat(edge_regressors, dim=0) if edge_regressors else None + ) + + # Assemble target signals + y = torch.cat(target_signals, dim=0) + + # Permute to shape [F, T, N] -> [N, F, T] + if x is not None and x.dim() == 3: + x = x.permute(2, 0, 1) + + if edge_attr is not None and edge_attr.dim() == 3: + edge_attr = edge_attr.permute(2, 0, 1) + + if y is not None and y.dim() == 3: + y = y.permute(2, 0, 1) + + # Get ride of last channel if temporal parameter is False + if not self.parameters.temporal: + x = x.squeeze(dim=2) + edge_attr = edge_attr.squeeze(dim=2) + y = y.squeeze(dim=2) + + # Create Data object + data = Data( + x=x, + edge_index=edge_index, + edge_attr=edge_attr, + y=y, + ) + + # Add a graph identifier for the chosen scenario + data.scenario_id = i + + # Collect generated graph samples + graph_samples.append(data) + + # --- Collate and save --- + self.data, self.slices = self.collate(graph_samples) + self._data_list = None + + fs.torch_save( + (self._data.to_dict(), self.slices, {}, self._data.__class__), + self.processed_paths[0], + ) + + +# Subclasses for each dataset # + + +class AnytownDataset(WDNDataset): + """Dataset generated with the Anytown WDN model.""" + + URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_Anytown_20240524_1202_csvdir_20240527_1205.zip?download=1" + + +class BalermanDataset(WDNDataset): + """Dataset generated with the Balerma WDN model.""" + + URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_balerman_20240524_1233_csvdir_20240527_1205.zip?download=1" + + +class CTownDataset(WDNDataset): + """Dataset generated with the C-Town WDN model.""" + + URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ctown_20240524_1231_csvdir_20240527_1208.zip?download=1" + + +class DTownDataset(WDNDataset): + """Dataset generated with the D-Town WDN model.""" + + URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_d-town_20240525_1755_csvdir_20240527_1210.zip?download=1" + + +class EXNDataset(WDNDataset): + """Dataset generated with the EXN WDN model.""" + + URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_EXN_20240525_0928_csvdir_20240527_1237.zip?download=1" + + +class KY1Dataset(WDNDataset): + """Dataset generated with the K1 WDN model.""" + + URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky1_20240524_1229_csvdir_20240527_1218.zip?download=1" + + +class KY6Dataset(WDNDataset): + """Dataset generated with the K6 WDN model.""" + + URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky6_20240524_1228_csvdir_20240527_1223.zip?download=1" + + +class KY8Dataset(WDNDataset): + """Dataset generated with the K8 WDN model.""" + + URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky8_20240524_1228_csvdir_20240527_1225.zip?download=1" + + +class KY10Dataset(WDNDataset): + """Dataset generated with the K10 WDN model.""" + + URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky10_20240524_1229_csvdir_20240527_1218.zip?download=1" + + +class LTownDataset(WDNDataset): + """Dataset generated with the L-Town WDN model.""" + + URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_L-TOWN_Real_20240524_1228_csvdir_20240527_1232.zip?download=1" + + +class ModenaDataset(WDNDataset): + """Dataset generated with the Modena WDN model.""" + + URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_moderna_20240524_1230_csvdir_20240527_1212.zip?download=1" diff --git a/topobench/data/loaders/graph/wdn_dataset_loader.py b/topobench/data/loaders/graph/wdn_dataset_loader.py new file mode 100644 index 000000000..b5e624c03 --- /dev/null +++ b/topobench/data/loaders/graph/wdn_dataset_loader.py @@ -0,0 +1,89 @@ +"""Loader for Water Distribution Network dataset.""" + +from __future__ import annotations + +from typing import Any + +from omegaconf import DictConfig +from torch_geometric.data import Dataset + +from topobench.data.datasets.wdn_dataset import ( + AnytownDataset, + BalermanDataset, + CTownDataset, + DTownDataset, + EXNDataset, + KY1Dataset, + KY6Dataset, + KY8Dataset, + KY10Dataset, + LTownDataset, + ModenaDataset, +) +from topobench.data.loaders.base import AbstractLoader + + +class WDNDatasetLoader(AbstractLoader): + """ + Load WDN dataset. + + Parameters + ---------- + parameters : DictConfig + Configuration parameters containing: + - data_name: Name of the dataset + - data_dir: Root directory for data + - regressors: Observed variables + - target: Target variables of the regression problem + - temporal: Whether to consider the temporal dimension or not + - num_scenarios: Number of scenarios to be considered + - num_instants: Number of observations to be considered within each scenario. + """ + + # This map routes a data_name to a class of WDNDataset + + _DATASETS: dict[str, type[Any]] = { + "anytown": AnytownDataset, + "balerman": BalermanDataset, + "ctown": CTownDataset, + "dtown": DTownDataset, + "exn": EXNDataset, + "ky1": KY1Dataset, + "ky6": KY6Dataset, + "ky8": KY8Dataset, + "ky10": KY10Dataset, + "ltown": LTownDataset, + "modena": ModenaDataset, + } + + def __init__(self, parameters: DictConfig) -> None: + super().__init__(parameters) + + def load_dataset(self) -> Dataset: + """ + Load the chosen WDN dataset. + + Returns + ------- + WDNDataset + The loaded WDN dataset with the appropriate `data_dir`. + + Raises + ------ + RuntimeError + If dataset loading fails. + """ + name = self.parameters.data_name.lower() + + try: + dataset_cls = type(self)._DATASETS[name] + except KeyError as err: + raise RuntimeError( + f"Unknown dataset '{name}'. " + f"Available datasets: {list(type(self)._DATASETS.keys())}" + ) from err + + return dataset_cls( + root=str(self.root_data_dir), + parameters=self.parameters, + ) From 2561a0ecf7c36d49566af7f6663aaeafa273ea4a Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Sun, 23 Nov 2025 23:21:21 +0100 Subject: [PATCH 02/13] Update files before merge --- test/pipeline/test_pipeline.py | 7 ++++++- topobench/data/datasets/wdn_dataset.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/test/pipeline/test_pipeline.py b/test/pipeline/test_pipeline.py index 5d8ca8c93..f1f491a41 100644 --- a/test/pipeline/test_pipeline.py +++ b/test/pipeline/test_pipeline.py @@ -4,7 +4,7 @@ from test._utils.simplified_pipeline import run print('Performing test pipeline...') -DATASET = "graph/anytown" # ADD YOUR DATASET HERE +DATASET = "graph/balerman" # ADD YOUR DATASET HERE MODELS = ["graph/gcn", ] # ADD ONE OR SEVERAL MODELS OF YOUR CHOICE HERE @@ -35,3 +35,8 @@ def test_pipeline(self): run(cfg) print('Done!') +# --- Run the pipeline --- +if __name__ == "__main__": + pipeline = TestPipeline() + pipeline.setup_method() + pipeline.test_pipeline() \ No newline at end of file diff --git a/topobench/data/datasets/wdn_dataset.py b/topobench/data/datasets/wdn_dataset.py index 58753a25c..396684382 100644 --- a/topobench/data/datasets/wdn_dataset.py +++ b/topobench/data/datasets/wdn_dataset.py @@ -202,7 +202,7 @@ def process(self) -> None: # --- Variables to retain --- regressors = getattr(self.parameters, "regressors", []) - targets = getattr(self.parameters, "targets", []) + targets = getattr(self.parameters, "target", []) assert len(targets) == 1, ( f"Expected exactly one target variable, got {len(targets)}." From c390306740fd67cf2d0902609c158fc9ccd63b1c Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Sun, 23 Nov 2025 23:29:46 +0100 Subject: [PATCH 03/13] Minor changes --- test/pipeline/test_pipeline.py | 9 +-------- topobench/data/datasets/wdn_dataset.py | 3 +++ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/test/pipeline/test_pipeline.py b/test/pipeline/test_pipeline.py index f1f491a41..02b272388 100644 --- a/test/pipeline/test_pipeline.py +++ b/test/pipeline/test_pipeline.py @@ -3,8 +3,7 @@ import hydra from test._utils.simplified_pipeline import run -print('Performing test pipeline...') -DATASET = "graph/balerman" # ADD YOUR DATASET HERE +DATASET = "graph/anytown" # ADD YOUR DATASET HERE MODELS = ["graph/gcn", ] # ADD ONE OR SEVERAL MODELS OF YOUR CHOICE HERE @@ -34,9 +33,3 @@ def test_pipeline(self): ) run(cfg) print('Done!') - -# --- Run the pipeline --- -if __name__ == "__main__": - pipeline = TestPipeline() - pipeline.setup_method() - pipeline.test_pipeline() \ No newline at end of file diff --git a/topobench/data/datasets/wdn_dataset.py b/topobench/data/datasets/wdn_dataset.py index 396684382..6ef5d5657 100644 --- a/topobench/data/datasets/wdn_dataset.py +++ b/topobench/data/datasets/wdn_dataset.py @@ -307,6 +307,9 @@ def is_edge_var(var_name: str) -> bool: edge_attr = edge_attr.squeeze(dim=2) y = y.squeeze(dim=2) + # Label must be one-dimensional (only one feature supported currently) + y = y.squeeze(dim=1) + # Create Data object data = Data( x=x, From 0da990d0ed778417c88aa050f4cfe67170dad930 Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Mon, 24 Nov 2025 14:23:12 +0100 Subject: [PATCH 04/13] Corrected node and edge remapping --- topobench/data/datasets/wdn_dataset.py | 126 +++++++++++++------------ 1 file changed, 64 insertions(+), 62 deletions(-) diff --git a/topobench/data/datasets/wdn_dataset.py b/topobench/data/datasets/wdn_dataset.py index 6ef5d5657..0afb618a3 100644 --- a/topobench/data/datasets/wdn_dataset.py +++ b/topobench/data/datasets/wdn_dataset.py @@ -169,28 +169,25 @@ def process(self) -> None: with open(attributes_path) as f: attributes_data = json.load(f) - # --- Build edge_index --- + # --- Build edge_index with edge IDs --- adj_list = attributes_data["adj_list"] - edge_index, _ = zip( - *[((int(src), int(dst)), eid) for src, dst, eid in adj_list], - strict=False, - ) - - edge_index = ( - torch.tensor(edge_index, dtype=torch.long).t().contiguous() - ) - # --- Remapping nodes to common ids --- - # Suppose original node IDs are in 'edge_index' - edge_index = edge_index.clone() + # Extract all unique nodes + all_nodes = {src for src, _, _ in adj_list} | { + dst for _, dst, _ in adj_list + } + node_id_map = {old: i for i, old in enumerate(sorted(all_nodes))} - # Map node IDs to 0..num_nodes-1 - unique_nodes = torch.unique(edge_index) - node_id_map = {old.item(): i for i, old in enumerate(unique_nodes)} + # Remap edges to integers and collect edge IDs + edge_index_list = [] + edge_ids = [] + for src, dst, eid in adj_list: + edge_index_list.append((node_id_map[src], node_id_map[dst])) + edge_ids.append(eid) - for i in range(edge_index.size(1)): - edge_index[0, i] = node_id_map[edge_index[0, i].item()] - edge_index[1, i] = node_id_map[edge_index[1, i].item()] + edge_index = ( + torch.tensor(edge_index_list, dtype=torch.long).t().contiguous() + ) # --- Scenarios and time-instants selection --- total_scenarios = attributes_data["gen_batch_size"] @@ -212,20 +209,19 @@ def process(self) -> None: # --- Load all requested CSVs --- data_tensors = {} + csv_columns = {} # store column names for each CSV for file_name in retain_files: csv_path = osp.join(self.raw_dir, f"{file_name}.csv") if not osp.exists(csv_path): continue df = pd.read_csv(csv_path, index_col=0) - regressor_shape = df.shape[1] - - # Convert to (num_scenarios, duration, n) + csv_columns[file_name] = df.columns.tolist() tensor = torch.tensor(df.values, dtype=torch.float32) + # reshape to (scenarios, duration, features) tensor = tensor.reshape( - total_scenarios, total_duration, regressor_shape + total_scenarios, total_duration, df.shape[1] ) - - # Select temporal subset + # select temporal subset tensor = tensor[:num_scenarios, :num_instants, :] data_tensors[file_name] = tensor @@ -251,64 +247,74 @@ def is_edge_var(var_name: str) -> bool: "friction_factor", ] - # --- Build graph samples --- + # --- Reorder node features according to node_id_map --- + unique_nodes = torch.unique(edge_index) + node_order = [n.item() for n in unique_nodes] + graph_samples = [] for i in range(num_scenarios): node_regressors, edge_regressors = [], [] target_signals = [] - # Separate by type and purpose - for var_name in regressors: + # Node features + for var_name in regressors + targets: if var_name not in data_tensors: continue + tensor = data_tensors[var_name][ + i + ] # shape [T, num_edges or num_nodes] if is_edge_var(var_name): - edge_regressors.append( - data_tensors[var_name][i].unsqueeze(0) - ) + # Reorder columns to match edge_index order via edge_ids + tensor = tensor[ + :, + [ + csv_columns[var_name].index(str(eid)) + for eid in edge_ids + ], + ] + if var_name in regressors: + edge_regressors.append(tensor.unsqueeze(0)) + else: + target_signals.append(tensor.unsqueeze(0)) else: - node_regressors.append( - data_tensors[var_name][i].unsqueeze(0) - ) - - for var_name in targets: - if var_name not in data_tensors: - continue - if is_edge_var(var_name): - target_signals.append( - data_tensors[var_name][i].unsqueeze(0) - ) - else: - target_signals.append( - data_tensors[var_name][i].unsqueeze(0) - ) - - # Assemble node and edge attributes + # Node-level features: reorder according to node_order + tensor = tensor[ + :, + [ + csv_columns[var_name].index(str(n)) + for n in node_order + ], + ] + if var_name in regressors: + node_regressors.append(tensor.unsqueeze(0)) + else: + target_signals.append(tensor.unsqueeze(0)) + + # Assemble features x = torch.cat(node_regressors, dim=0) if node_regressors else None edge_attr = ( torch.cat(edge_regressors, dim=0) if edge_regressors else None ) - - # Assemble target signals y = torch.cat(target_signals, dim=0) - # Permute to shape [F, T, N] -> [N, F, T] + # Permute to [N, F, T] if x is not None and x.dim() == 3: x = x.permute(2, 0, 1) - if edge_attr is not None and edge_attr.dim() == 3: edge_attr = edge_attr.permute(2, 0, 1) - if y is not None and y.dim() == 3: y = y.permute(2, 0, 1) - # Get ride of last channel if temporal parameter is False + # Drop last dim if temporal=False if not self.parameters.temporal: - x = x.squeeze(dim=2) - edge_attr = edge_attr.squeeze(dim=2) - y = y.squeeze(dim=2) + x = x.squeeze(-1) if x is not None else None + edge_attr = ( + edge_attr.squeeze(-1) if edge_attr is not None else None + ) + y = y.squeeze(-1) if y is not None else None - # Label must be one-dimensional (only one feature supported currently) - y = y.squeeze(dim=1) + # Squeeze feature dim for targets (currently only one target allowed) + y = y.squeeze(1) if y is not None else None # Create Data object data = Data( @@ -318,16 +324,12 @@ def is_edge_var(var_name: str) -> bool: y=y, ) - # Add a graph identifier for the chosen scenario data.scenario_id = i - - # Collect generated graph samples graph_samples.append(data) # --- Collate and save --- self.data, self.slices = self.collate(graph_samples) self._data_list = None - fs.torch_save( (self._data.to_dict(), self.slices, {}, self._data.__class__), self.processed_paths[0], From d367ca5d9d29ff9663808ad3a65902f099cf2710 Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Mon, 24 Nov 2025 14:47:12 +0100 Subject: [PATCH 05/13] Minor corrections for workability --- test/pipeline/test_pipeline.py | 6 ++++++ topobench/data/datasets/wdn_dataset.py | 15 ++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/test/pipeline/test_pipeline.py b/test/pipeline/test_pipeline.py index 02b272388..2c4f13a21 100644 --- a/test/pipeline/test_pipeline.py +++ b/test/pipeline/test_pipeline.py @@ -33,3 +33,9 @@ def test_pipeline(self): ) run(cfg) print('Done!') + +# --- Run the pipeline --- +if __name__ == "__main__": + pipeline = TestPipeline() + pipeline.setup_method() + pipeline.test_pipeline() diff --git a/topobench/data/datasets/wdn_dataset.py b/topobench/data/datasets/wdn_dataset.py index 0afb618a3..2af7842de 100644 --- a/topobench/data/datasets/wdn_dataset.py +++ b/topobench/data/datasets/wdn_dataset.py @@ -172,11 +172,12 @@ def process(self) -> None: # --- Build edge_index with edge IDs --- adj_list = attributes_data["adj_list"] - # Extract all unique nodes + # Extract all unique nodes and remap them to contiguous indices all_nodes = {src for src, _, _ in adj_list} | { dst for _, dst, _ in adj_list } - node_id_map = {old: i for i, old in enumerate(sorted(all_nodes))} + node_id_map = {node: i for i, node in enumerate(sorted(all_nodes))} + id_node_map_pivot = {v: k for k, v in node_id_map.items()} # Remap edges to integers and collect edge IDs edge_index_list = [] @@ -217,11 +218,13 @@ def process(self) -> None: df = pd.read_csv(csv_path, index_col=0) csv_columns[file_name] = df.columns.tolist() tensor = torch.tensor(df.values, dtype=torch.float32) - # reshape to (scenarios, duration, features) + + # Reshape to (scenarios, duration, features) tensor = tensor.reshape( total_scenarios, total_duration, df.shape[1] ) - # select temporal subset + + # Select temporal subset tensor = tensor[:num_scenarios, :num_instants, :] data_tensors[file_name] = tensor @@ -281,7 +284,9 @@ def is_edge_var(var_name: str) -> bool: tensor = tensor[ :, [ - csv_columns[var_name].index(str(n)) + csv_columns[var_name].index( + str(id_node_map_pivot[n]) + ) for n in node_order ], ] From efb3d0aa79614bdb9752e5b47464e30b5573d74c Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Mon, 24 Nov 2025 14:48:11 +0100 Subject: [PATCH 06/13] Removed debugging stuff --- test/pipeline/test_pipeline.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/pipeline/test_pipeline.py b/test/pipeline/test_pipeline.py index 2c4f13a21..02b272388 100644 --- a/test/pipeline/test_pipeline.py +++ b/test/pipeline/test_pipeline.py @@ -33,9 +33,3 @@ def test_pipeline(self): ) run(cfg) print('Done!') - -# --- Run the pipeline --- -if __name__ == "__main__": - pipeline = TestPipeline() - pipeline.setup_method() - pipeline.test_pipeline() From 0828d8bbf3c64c57d4af471783dd46dc833be5f3 Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Mon, 24 Nov 2025 15:18:50 +0100 Subject: [PATCH 07/13] Bugs with directory location --- test/pipeline/test_pipeline.py | 8 +++++++- topobench/data/datasets/wdn_dataset.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/test/pipeline/test_pipeline.py b/test/pipeline/test_pipeline.py index 02b272388..413711f09 100644 --- a/test/pipeline/test_pipeline.py +++ b/test/pipeline/test_pipeline.py @@ -3,7 +3,7 @@ import hydra from test._utils.simplified_pipeline import run -DATASET = "graph/anytown" # ADD YOUR DATASET HERE +DATASET = "graph/modena" # ADD YOUR DATASET HERE MODELS = ["graph/gcn", ] # ADD ONE OR SEVERAL MODELS OF YOUR CHOICE HERE @@ -33,3 +33,9 @@ def test_pipeline(self): ) run(cfg) print('Done!') + +# --- Run the pipeline --- +if __name__ == "__main__": + pipeline = TestPipeline() + pipeline.setup_method() + pipeline.test_pipeline() diff --git a/topobench/data/datasets/wdn_dataset.py b/topobench/data/datasets/wdn_dataset.py index 2af7842de..ca38fca93 100644 --- a/topobench/data/datasets/wdn_dataset.py +++ b/topobench/data/datasets/wdn_dataset.py @@ -135,7 +135,7 @@ def download(self) -> None: extract_zip(path, self.raw_dir) # Delete zip file - os.unlink(path) + # os.unlink(path) # Remove unretained files retain_files = getattr( From e05c41af62458da2d8940eecdca23e027ad46764 Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Mon, 24 Nov 2025 15:19:20 +0100 Subject: [PATCH 08/13] Removing debugging again --- test/pipeline/test_pipeline.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/test/pipeline/test_pipeline.py b/test/pipeline/test_pipeline.py index 413711f09..039cf708a 100644 --- a/test/pipeline/test_pipeline.py +++ b/test/pipeline/test_pipeline.py @@ -3,7 +3,7 @@ import hydra from test._utils.simplified_pipeline import run -DATASET = "graph/modena" # ADD YOUR DATASET HERE +DATASET = "graph/anytoen" # ADD YOUR DATASET HERE MODELS = ["graph/gcn", ] # ADD ONE OR SEVERAL MODELS OF YOUR CHOICE HERE @@ -33,9 +33,3 @@ def test_pipeline(self): ) run(cfg) print('Done!') - -# --- Run the pipeline --- -if __name__ == "__main__": - pipeline = TestPipeline() - pipeline.setup_method() - pipeline.test_pipeline() From f664176892bd0cc761eef318d49dbf9626fa52e8 Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Mon, 24 Nov 2025 15:24:38 +0100 Subject: [PATCH 09/13] Corrected typos --- test/pipeline/test_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/pipeline/test_pipeline.py b/test/pipeline/test_pipeline.py index 039cf708a..02b272388 100644 --- a/test/pipeline/test_pipeline.py +++ b/test/pipeline/test_pipeline.py @@ -3,7 +3,7 @@ import hydra from test._utils.simplified_pipeline import run -DATASET = "graph/anytoen" # ADD YOUR DATASET HERE +DATASET = "graph/anytown" # ADD YOUR DATASET HERE MODELS = ["graph/gcn", ] # ADD ONE OR SEVERAL MODELS OF YOUR CHOICE HERE From b2f9cbdb817a06c106b28670d6f570df0526149b Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Mon, 24 Nov 2025 16:17:31 +0100 Subject: [PATCH 10/13] Corrected directory pointer, all tests passed locally --- topobench/data/datasets/wdn_dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/topobench/data/datasets/wdn_dataset.py b/topobench/data/datasets/wdn_dataset.py index ca38fca93..24977852d 100644 --- a/topobench/data/datasets/wdn_dataset.py +++ b/topobench/data/datasets/wdn_dataset.py @@ -121,6 +121,7 @@ def download(self) -> None: f"URL or FILE_FORMAT not set for {self.parameters.data_name}" ) + # Download data from the source download_file_from_link( file_link=self.URL, path_to_save=self.raw_dir, @@ -132,10 +133,11 @@ def download(self) -> None: path = osp.join( self.raw_dir, f"{self.parameters.data_name}.{self.FILE_FORMAT}" ) + extract_zip(path, self.raw_dir) # Delete zip file - # os.unlink(path) + os.unlink(path) # Remove unretained files retain_files = getattr( From 5359465f5aef28dfd6068a80c166da6bec768b27 Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Mon, 24 Nov 2025 18:34:07 +0100 Subject: [PATCH 11/13] KY10's link is offline, so deleted it from config folder and loader --- configs/dataset/graph/ky10.yaml | 48 -------------------------- topobench/data/datasets/wdn_dataset.py | 7 ++-- 2 files changed, 4 insertions(+), 51 deletions(-) delete mode 100644 configs/dataset/graph/ky10.yaml diff --git a/configs/dataset/graph/ky10.yaml b/configs/dataset/graph/ky10.yaml deleted file mode 100644 index b79d7ab0e..000000000 --- a/configs/dataset/graph/ky10.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Dataset loader config -loader: - _target_: topobench.data.loaders.WDNDatasetLoader - parameters: - data_domain: graph - data_type: wdn_dataset - data_name: ky10 - data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} - regressors: ["pressure", "demand", "flowrate"] - target: ["head"] - temporal: False - num_scenarios: 1 - num_instants: 1 - -# Variables among which one can choose as regressors and target variables -# pressure NODE -# demand NODE -# flowrate EDGE -# velocity EDGE -# head NODE -# head_loss EDGE -# friction_factor EDGE -# Be careful when overriding task_level, num_features and num_edge_features - -# Dataset parameters -parameters: - task: regression - task_level: node - num_features: 2 - num_edge_features: 1 - num_classes: 1 - loss_type: mse - monitor_metric: mse - -# splits -split_params: - learning_setting: transductive - data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} - data_seed: 0 - split_type: random - train_prop: 0.8 - k: 10 - -# Dataloader parameters -dataloader_params: - batch_size: 1 - num_workers: 1 - pin_memory: False diff --git a/topobench/data/datasets/wdn_dataset.py b/topobench/data/datasets/wdn_dataset.py index 24977852d..637a851f5 100644 --- a/topobench/data/datasets/wdn_dataset.py +++ b/topobench/data/datasets/wdn_dataset.py @@ -394,10 +394,11 @@ class KY8Dataset(WDNDataset): URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky8_20240524_1228_csvdir_20240527_1225.zip?download=1" -class KY10Dataset(WDNDataset): - """Dataset generated with the K10 WDN model.""" +# This went offline +# class KY10Dataset(WDNDataset): +# """Dataset generated with the K10 WDN model.""" - URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky10_20240524_1229_csvdir_20240527_1218.zip?download=1" +# URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky10_20240524_1229_csvdir_20240527_1218.zip?download=1" class LTownDataset(WDNDataset): From 1d567a16e99b0ff544934fc619ed0addb33c1c58 Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Mon, 24 Nov 2025 18:34:50 +0100 Subject: [PATCH 12/13] Corrected import in loader --- topobench/data/loaders/graph/wdn_dataset_loader.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/topobench/data/loaders/graph/wdn_dataset_loader.py b/topobench/data/loaders/graph/wdn_dataset_loader.py index b5e624c03..8a09b2cb0 100644 --- a/topobench/data/loaders/graph/wdn_dataset_loader.py +++ b/topobench/data/loaders/graph/wdn_dataset_loader.py @@ -16,7 +16,6 @@ KY1Dataset, KY6Dataset, KY8Dataset, - KY10Dataset, LTownDataset, ModenaDataset, ) @@ -51,7 +50,6 @@ class WDNDatasetLoader(AbstractLoader): "ky1": KY1Dataset, "ky6": KY6Dataset, "ky8": KY8Dataset, - "ky10": KY10Dataset, "ltown": LTownDataset, "modena": ModenaDataset, } From ba7a02b2f83875824e612965c5f03de6e246d176 Mon Sep 17 00:00:00 2001 From: LeoDiNino97 Date: Mon, 24 Nov 2025 21:48:10 +0100 Subject: [PATCH 13/13] Refactoring the subclasses into the main dataset class to avoid overhead in the number of configuration files: one is sufficient --- .../dataset/graph/{anytown.yaml => WDN.yaml} | 2 +- configs/dataset/graph/balerman.yaml | 48 ---------- configs/dataset/graph/ctown.yaml | 48 ---------- configs/dataset/graph/dtown.yaml | 48 ---------- configs/dataset/graph/exn.yaml | 48 ---------- configs/dataset/graph/ky1.yaml | 48 ---------- configs/dataset/graph/ky6.yaml | 48 ---------- configs/dataset/graph/ky8.yaml | 48 ---------- configs/dataset/graph/ltown.yaml | 48 ---------- configs/dataset/graph/modena.yaml | 48 ---------- test/pipeline/test_pipeline.py | 2 +- topobench/data/datasets/wdn_dataset.py | 90 +++++++++++-------- .../data/loaders/graph/wdn_dataset_loader.py | 47 ++++------ 13 files changed, 70 insertions(+), 503 deletions(-) rename configs/dataset/graph/{anytown.yaml => WDN.yaml} (92%) delete mode 100644 configs/dataset/graph/balerman.yaml delete mode 100644 configs/dataset/graph/ctown.yaml delete mode 100644 configs/dataset/graph/dtown.yaml delete mode 100644 configs/dataset/graph/exn.yaml delete mode 100644 configs/dataset/graph/ky1.yaml delete mode 100644 configs/dataset/graph/ky6.yaml delete mode 100644 configs/dataset/graph/ky8.yaml delete mode 100644 configs/dataset/graph/ltown.yaml delete mode 100644 configs/dataset/graph/modena.yaml diff --git a/configs/dataset/graph/anytown.yaml b/configs/dataset/graph/WDN.yaml similarity index 92% rename from configs/dataset/graph/anytown.yaml rename to configs/dataset/graph/WDN.yaml index ffa79b4f4..90ea15511 100644 --- a/configs/dataset/graph/anytown.yaml +++ b/configs/dataset/graph/WDN.yaml @@ -4,7 +4,7 @@ loader: parameters: data_domain: graph data_type: wdn_dataset - data_name: anytown + data_name: anytown # ['balerman','ctown','dtown','exn','ky1','ky6','ky8','ltown','modena'] data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} regressors: ["pressure", "demand", "flowrate"] target: ["head"] diff --git a/configs/dataset/graph/balerman.yaml b/configs/dataset/graph/balerman.yaml deleted file mode 100644 index 42c7176c2..000000000 --- a/configs/dataset/graph/balerman.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Dataset loader config -loader: - _target_: topobench.data.loaders.WDNDatasetLoader - parameters: - data_domain: graph - data_type: wdn_dataset - data_name: balerman - data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} - regressors: ["pressure", "demand", "flowrate"] - target: ["head"] - temporal: False - num_scenarios: 1 - num_instants: 1 - -# Variables among which one can choose as regressors and target variables -# pressure NODE -# demand NODE -# flowrate EDGE -# velocity EDGE -# head NODE -# head_loss EDGE -# friction_factor EDGE -# Be careful when overriding task_level, num_features and num_edge_features - -# Dataset parameters -parameters: - task: regression - task_level: node - num_features: 2 - num_edge_features: 1 - num_classes: 1 - loss_type: mse - monitor_metric: mse - -# splits -split_params: - learning_setting: transductive - data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} - data_seed: 0 - split_type: random - train_prop: 0.8 - k: 10 - -# Dataloader parameters -dataloader_params: - batch_size: 1 - num_workers: 1 - pin_memory: False diff --git a/configs/dataset/graph/ctown.yaml b/configs/dataset/graph/ctown.yaml deleted file mode 100644 index 18dfba744..000000000 --- a/configs/dataset/graph/ctown.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Dataset loader config -loader: - _target_: topobench.data.loaders.WDNDatasetLoader - parameters: - data_domain: graph - data_type: wdn_dataset - data_name: ctown - data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} - regressors: ["pressure", "demand", "flowrate"] - target: ["head"] - temporal: False - num_scenarios: 1 - num_instants: 1 - -# Variables among which one can choose as regressors and target variables -# pressure NODE -# demand NODE -# flowrate EDGE -# velocity EDGE -# head NODE -# head_loss EDGE -# friction_factor EDGE -# Be careful when overriding task_level, num_features and num_edge_features - -# Dataset parameters -parameters: - task: regression - task_level: node - num_features: 2 - num_edge_features: 1 - num_classes: 1 - loss_type: mse - monitor_metric: mse - -# splits -split_params: - learning_setting: transductive - data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} - data_seed: 0 - split_type: random - train_prop: 0.8 - k: 10 - -# Dataloader parameters -dataloader_params: - batch_size: 1 - num_workers: 1 - pin_memory: False diff --git a/configs/dataset/graph/dtown.yaml b/configs/dataset/graph/dtown.yaml deleted file mode 100644 index ccf10db67..000000000 --- a/configs/dataset/graph/dtown.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Dataset loader config -loader: - _target_: topobench.data.loaders.WDNDatasetLoader - parameters: - data_domain: graph - data_type: wdn_dataset - data_name: dtown - data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} - regressors: ["pressure", "demand", "flowrate"] - target: ["head"] - temporal: False - num_scenarios: 1 - num_instants: 1 - -# Variables among which one can choose as regressors and target variables -# pressure NODE -# demand NODE -# flowrate EDGE -# velocity EDGE -# head NODE -# head_loss EDGE -# friction_factor EDGE -# Be careful when overriding task_level, num_features and num_edge_features - -# Dataset parameters -parameters: - task: regression - task_level: node - num_features: 2 - num_edge_features: 1 - num_classes: 1 - loss_type: mse - monitor_metric: mse - -# splits -split_params: - learning_setting: transductive - data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} - data_seed: 0 - split_type: random - train_prop: 0.8 - k: 10 - -# Dataloader parameters -dataloader_params: - batch_size: 1 - num_workers: 1 - pin_memory: False diff --git a/configs/dataset/graph/exn.yaml b/configs/dataset/graph/exn.yaml deleted file mode 100644 index eb76e744f..000000000 --- a/configs/dataset/graph/exn.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Dataset loader config -loader: - _target_: topobench.data.loaders.WDNDatasetLoader - parameters: - data_domain: graph - data_type: wdn_dataset - data_name: exn - data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} - regressors: ["pressure", "demand", "flowrate"] - target: ["head"] - temporal: False - num_scenarios: 1 - num_instants: 1 - -# Variables among which one can choose as regressors and target variables -# pressure NODE -# demand NODE -# flowrate EDGE -# velocity EDGE -# head NODE -# head_loss EDGE -# friction_factor EDGE -# Be careful when overriding task_level, num_features and num_edge_features - -# Dataset parameters -parameters: - task: regression - task_level: node - num_features: 2 - num_edge_features: 1 - num_classes: 1 - loss_type: mse - monitor_metric: mse - -# splits -split_params: - learning_setting: transductive - data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} - data_seed: 0 - split_type: random - train_prop: 0.8 - k: 10 - -# Dataloader parameters -dataloader_params: - batch_size: 1 - num_workers: 1 - pin_memory: False diff --git a/configs/dataset/graph/ky1.yaml b/configs/dataset/graph/ky1.yaml deleted file mode 100644 index 0b862599a..000000000 --- a/configs/dataset/graph/ky1.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Dataset loader config -loader: - _target_: topobench.data.loaders.WDNDatasetLoader - parameters: - data_domain: graph - data_type: wdn_dataset - data_name: ky1 - data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} - regressors: ["pressure", "demand", "flowrate"] - target: ["head"] - temporal: False - num_scenarios: 1 - num_instants: 1 - -# Variables among which one can choose as regressors and target variables -# pressure NODE -# demand NODE -# flowrate EDGE -# velocity EDGE -# head NODE -# head_loss EDGE -# friction_factor EDGE -# Be careful when overriding task_level, num_features and num_edge_features - -# Dataset parameters -parameters: - task: regression - task_level: node - num_features: 2 - num_edge_features: 1 - num_classes: 1 - loss_type: mse - monitor_metric: mse - -# splits -split_params: - learning_setting: transductive - data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} - data_seed: 0 - split_type: random - train_prop: 0.8 - k: 10 - -# Dataloader parameters -dataloader_params: - batch_size: 1 - num_workers: 1 - pin_memory: False diff --git a/configs/dataset/graph/ky6.yaml b/configs/dataset/graph/ky6.yaml deleted file mode 100644 index 1e28f67a7..000000000 --- a/configs/dataset/graph/ky6.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Dataset loader config -loader: - _target_: topobench.data.loaders.WDNDatasetLoader - parameters: - data_domain: graph - data_type: wdn_dataset - data_name: ky6 - data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} - regressors: ["pressure", "demand", "flowrate"] - target: ["head"] - temporal: False - num_scenarios: 1 - num_instants: 1 - -# Variables among which one can choose as regressors and target variables -# pressure NODE -# demand NODE -# flowrate EDGE -# velocity EDGE -# head NODE -# head_loss EDGE -# friction_factor EDGE -# Be careful when overriding task_level, num_features and num_edge_features - -# Dataset parameters -parameters: - task: regression - task_level: node - num_features: 2 - num_edge_features: 1 - num_classes: 1 - loss_type: mse - monitor_metric: mse - -# splits -split_params: - learning_setting: transductive - data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} - data_seed: 0 - split_type: random - train_prop: 0.8 - k: 10 - -# Dataloader parameters -dataloader_params: - batch_size: 1 - num_workers: 1 - pin_memory: False diff --git a/configs/dataset/graph/ky8.yaml b/configs/dataset/graph/ky8.yaml deleted file mode 100644 index e218f361b..000000000 --- a/configs/dataset/graph/ky8.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Dataset loader config -loader: - _target_: topobench.data.loaders.WDNDatasetLoader - parameters: - data_domain: graph - data_type: wdn_dataset - data_name: ky8 - data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} - regressors: ["pressure", "demand", "flowrate"] - target: ["head"] - temporal: False - num_scenarios: 1 - num_instants: 1 - -# Variables among which one can choose as regressors and target variables -# pressure NODE -# demand NODE -# flowrate EDGE -# velocity EDGE -# head NODE -# head_loss EDGE -# friction_factor EDGE -# Be careful when overriding task_level, num_features and num_edge_features - -# Dataset parameters -parameters: - task: regression - task_level: node - num_features: 2 - num_edge_features: 1 - num_classes: 1 - loss_type: mse - monitor_metric: mse - -# splits -split_params: - learning_setting: transductive - data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} - data_seed: 0 - split_type: random - train_prop: 0.8 - k: 10 - -# Dataloader parameters -dataloader_params: - batch_size: 1 - num_workers: 1 - pin_memory: False diff --git a/configs/dataset/graph/ltown.yaml b/configs/dataset/graph/ltown.yaml deleted file mode 100644 index 45e94b815..000000000 --- a/configs/dataset/graph/ltown.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Dataset loader config -loader: - _target_: topobench.data.loaders.WDNDatasetLoader - parameters: - data_domain: graph - data_type: wdn_dataset - data_name: ltown - data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} - regressors: ["pressure", "demand", "flowrate"] - target: ["head"] - temporal: False - num_scenarios: 1 - num_instants: 1 - -# Variables among which one can choose as regressors and target variables -# pressure NODE -# demand NODE -# flowrate EDGE -# velocity EDGE -# head NODE -# head_loss EDGE -# friction_factor EDGE -# Be careful when overriding task_level, num_features and num_edge_features - -# Dataset parameters -parameters: - task: regression - task_level: node - num_features: 2 - num_edge_features: 1 - num_classes: 1 - loss_type: mse - monitor_metric: mse - -# splits -split_params: - learning_setting: transductive - data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} - data_seed: 0 - split_type: random - train_prop: 0.8 - k: 10 - -# Dataloader parameters -dataloader_params: - batch_size: 1 - num_workers: 1 - pin_memory: False diff --git a/configs/dataset/graph/modena.yaml b/configs/dataset/graph/modena.yaml deleted file mode 100644 index 348ad66b9..000000000 --- a/configs/dataset/graph/modena.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Dataset loader config -loader: - _target_: topobench.data.loaders.WDNDatasetLoader - parameters: - data_domain: graph - data_type: wdn_dataset - data_name: modena - data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} - regressors: ["pressure", "demand", "flowrate"] - target: ["head"] - temporal: False - num_scenarios: 1 - num_instants: 1 - -# Variables among which one can choose as regressors and target variables -# pressure NODE -# demand NODE -# flowrate EDGE -# velocity EDGE -# head NODE -# head_loss EDGE -# friction_factor EDGE -# Be careful when overriding task_level, num_features and num_edge_features - -# Dataset parameters -parameters: - task: regression - task_level: node - num_features: 2 - num_edge_features: 1 - num_classes: 1 - loss_type: mse - monitor_metric: mse - -# splits -split_params: - learning_setting: transductive - data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} - data_seed: 0 - split_type: random - train_prop: 0.8 - k: 10 - -# Dataloader parameters -dataloader_params: - batch_size: 1 - num_workers: 1 - pin_memory: False diff --git a/test/pipeline/test_pipeline.py b/test/pipeline/test_pipeline.py index 02b272388..2e1044d99 100644 --- a/test/pipeline/test_pipeline.py +++ b/test/pipeline/test_pipeline.py @@ -3,7 +3,7 @@ import hydra from test._utils.simplified_pipeline import run -DATASET = "graph/anytown" # ADD YOUR DATASET HERE +DATASET = "graph/WDN" # ADD YOUR DATASET HERE MODELS = ["graph/gcn", ] # ADD ONE OR SEVERAL MODELS OF YOUR CHOICE HERE diff --git a/topobench/data/datasets/wdn_dataset.py b/topobench/data/datasets/wdn_dataset.py index 637a851f5..b66ecd30e 100644 --- a/topobench/data/datasets/wdn_dataset.py +++ b/topobench/data/datasets/wdn_dataset.py @@ -28,12 +28,24 @@ class WDNDataset(InMemoryDataset): Attributes ---------- - URLS (dict): Name of the specific dataset to be istantiated. + URLs (dict): Name of the specific dataset to be istantiated. FILE_FORMAT (dict): File format of the dataset. RAW_FILE_NAMES (dict): List of file names of the dataset. """ - URL: ClassVar[str] = None + URLs: ClassVar = { + "anytown": "https://zenodo.org/records/11353195/files/simgen_Anytown_20240524_1202_csvdir_20240527_1205.zip?download=1", + "balerman": "https://zenodo.org/records/11353195/files/simgen_balerman_20240524_1233_csvdir_20240527_1205.zip?download=1", + "ctown": "https://zenodo.org/records/11353195/files/simgen_ctown_20240524_1231_csvdir_20240527_1208.zip?download=1", + "dtown": "https://zenodo.org/records/11353195/files/simgen_d-town_20240525_1755_csvdir_20240527_1210.zip?download=1", + "exn": "https://zenodo.org/records/11353195/files/simgen_EXN_20240525_0928_csvdir_20240527_1237.zip?download=1", + "ky1": "https://zenodo.org/records/11353195/files/simgen_ky1_20240524_1229_csvdir_20240527_1218.zip?download=1", + "ky6": "https://zenodo.org/records/11353195/files/simgen_ky6_20240524_1228_csvdir_20240527_1223.zip?download=1", + "ky8": "https://zenodo.org/records/11353195/files/simgen_ky8_20240524_1228_csvdir_20240527_1225.zip?download=1", + "ltown": "https://zenodo.org/records/11353195/files/simgen_L-TOWN_Real_20240524_1228_csvdir_20240527_1232.zip?download=1", + "modena": "https://zenodo.org/records/11353195/files/simgen_moderna_20240524_1230_csvdir_20240527_1212.zip?download=1", + } + FILE_FORMAT: ClassVar[str] = "zip" def __init__(self, root: str, parameters: DictConfig) -> None: @@ -116,14 +128,14 @@ def download(self) -> None: Raises: FileNotFoundError: If the dataset URL is not found. """ - if self.URL is None or self.FILE_FORMAT is None: + if self.parameters.data_name not in self.URLs: raise FileNotFoundError( - f"URL or FILE_FORMAT not set for {self.parameters.data_name}" + f"URL not set for {self.parameters.data_name}" ) # Download data from the source download_file_from_link( - file_link=self.URL, + file_link=self.URLs[self.parameters.data_name], path_to_save=self.raw_dir, dataset_name=self.parameters.data_name, file_format=self.FILE_FORMAT, @@ -346,68 +358,68 @@ def is_edge_var(var_name: str) -> bool: # Subclasses for each dataset # -class AnytownDataset(WDNDataset): - """Dataset generated with the Anytown WDN model.""" +# class AnytownDataset(WDNDataset): +# """Dataset generated with the Anytown WDN model.""" - URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_Anytown_20240524_1202_csvdir_20240527_1205.zip?download=1" +# URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_Anytown_20240524_1202_csvdir_20240527_1205.zip?download=1" -class BalermanDataset(WDNDataset): - """Dataset generated with the Balerma WDN model.""" +# class BalermanDataset(WDNDataset): +# """Dataset generated with the Balerma WDN model.""" - URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_balerman_20240524_1233_csvdir_20240527_1205.zip?download=1" +# URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_balerman_20240524_1233_csvdir_20240527_1205.zip?download=1" -class CTownDataset(WDNDataset): - """Dataset generated with the C-Town WDN model.""" +# class CTownDataset(WDNDataset): +# """Dataset generated with the C-Town WDN model.""" - URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ctown_20240524_1231_csvdir_20240527_1208.zip?download=1" +# URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ctown_20240524_1231_csvdir_20240527_1208.zip?download=1" -class DTownDataset(WDNDataset): - """Dataset generated with the D-Town WDN model.""" +# class DTownDataset(WDNDataset): +# """Dataset generated with the D-Town WDN model.""" - URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_d-town_20240525_1755_csvdir_20240527_1210.zip?download=1" +# URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_d-town_20240525_1755_csvdir_20240527_1210.zip?download=1" -class EXNDataset(WDNDataset): - """Dataset generated with the EXN WDN model.""" +# class EXNDataset(WDNDataset): +# """Dataset generated with the EXN WDN model.""" - URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_EXN_20240525_0928_csvdir_20240527_1237.zip?download=1" +# URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_EXN_20240525_0928_csvdir_20240527_1237.zip?download=1" -class KY1Dataset(WDNDataset): - """Dataset generated with the K1 WDN model.""" +# class KY1Dataset(WDNDataset): +# """Dataset generated with the K1 WDN model.""" - URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky1_20240524_1229_csvdir_20240527_1218.zip?download=1" +# URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky1_20240524_1229_csvdir_20240527_1218.zip?download=1" -class KY6Dataset(WDNDataset): - """Dataset generated with the K6 WDN model.""" +# class KY6Dataset(WDNDataset): +# """Dataset generated with the K6 WDN model.""" - URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky6_20240524_1228_csvdir_20240527_1223.zip?download=1" +# URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky6_20240524_1228_csvdir_20240527_1223.zip?download=1" -class KY8Dataset(WDNDataset): - """Dataset generated with the K8 WDN model.""" +# class KY8Dataset(WDNDataset): +# """Dataset generated with the K8 WDN model.""" - URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky8_20240524_1228_csvdir_20240527_1225.zip?download=1" +# URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky8_20240524_1228_csvdir_20240527_1225.zip?download=1" -# This went offline -# class KY10Dataset(WDNDataset): -# """Dataset generated with the K10 WDN model.""" +# # This went offline +# # class KY10Dataset(WDNDataset): +# # """Dataset generated with the K10 WDN model.""" -# URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky10_20240524_1229_csvdir_20240527_1218.zip?download=1" +# # URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_ky10_20240524_1229_csvdir_20240527_1218.zip?download=1" -class LTownDataset(WDNDataset): - """Dataset generated with the L-Town WDN model.""" +# class LTownDataset(WDNDataset): +# """Dataset generated with the L-Town WDN model.""" - URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_L-TOWN_Real_20240524_1228_csvdir_20240527_1232.zip?download=1" +# URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_L-TOWN_Real_20240524_1228_csvdir_20240527_1232.zip?download=1" -class ModenaDataset(WDNDataset): - """Dataset generated with the Modena WDN model.""" +# class ModenaDataset(WDNDataset): +# """Dataset generated with the Modena WDN model.""" - URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_moderna_20240524_1230_csvdir_20240527_1212.zip?download=1" +# URL: ClassVar = "https://zenodo.org/records/11353195/files/simgen_moderna_20240524_1230_csvdir_20240527_1212.zip?download=1" diff --git a/topobench/data/loaders/graph/wdn_dataset_loader.py b/topobench/data/loaders/graph/wdn_dataset_loader.py index 8a09b2cb0..fc7e1a78b 100644 --- a/topobench/data/loaders/graph/wdn_dataset_loader.py +++ b/topobench/data/loaders/graph/wdn_dataset_loader.py @@ -7,18 +7,7 @@ from omegaconf import DictConfig from torch_geometric.data import Dataset -from topobench.data.datasets.wdn_dataset import ( - AnytownDataset, - BalermanDataset, - CTownDataset, - DTownDataset, - EXNDataset, - KY1Dataset, - KY6Dataset, - KY8Dataset, - LTownDataset, - ModenaDataset, -) +from topobench.data.datasets.wdn_dataset import WDNDataset from topobench.data.loaders.base import AbstractLoader @@ -41,18 +30,18 @@ class WDNDatasetLoader(AbstractLoader): # This map routes a data_name to a class of WDNDataset - _DATASETS: dict[str, type[Any]] = { - "anytown": AnytownDataset, - "balerman": BalermanDataset, - "ctown": CTownDataset, - "dtown": DTownDataset, - "exn": EXNDataset, - "ky1": KY1Dataset, - "ky6": KY6Dataset, - "ky8": KY8Dataset, - "ltown": LTownDataset, - "modena": ModenaDataset, - } + _DATASETS: list[str, type[Any]] = [ + "anytown", + "balerman", + "ctown", + "dtown", + "exn", + "ky1", + "ky6", + "ky8", + "ltown", + "modena", + ] def __init__(self, parameters: DictConfig) -> None: super().__init__(parameters) @@ -73,15 +62,13 @@ def load_dataset(self) -> Dataset: """ name = self.parameters.data_name.lower() - try: - dataset_cls = type(self)._DATASETS[name] - except KeyError as err: + if name not in type(self)._DATASETS: raise RuntimeError( f"Unknown dataset '{name}'. " - f"Available datasets: {list(type(self)._DATASETS.keys())}" - ) from err + f"Available datasets: {type(self)._DATASETS}" + ) - return dataset_cls( + return WDNDataset( root=str(self.root_data_dir), parameters=self.parameters, )