geometric-intelligence · Snopoff · Nov 24, 2025 · Nov 24, 2025 · Nov 24, 2025 · Nov 24, 2025
diff --git a/configs/dataset/hypergraph/conjugated_ocelotv1.yaml b/configs/dataset/hypergraph/conjugated_ocelotv1.yaml
@@ -0,0 +1,35 @@
+# Dataset loader config
+loader:
+  _target_: topobench.data.loaders.ConjugatedMoleculeDatasetLoader
+  parameters: 
+    data_domain: hypergraph
+    data_type: conjugated_molecules
+    data_name: OCELOTv1
+    data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}
+    target_col: ${dataset.parameters.target_col}
+    task: ${dataset.parameters.task}
+
+# Dataset parameters
+parameters:
+  num_features: 9  # OGB atom features
+  num_classes: 1  # Single target regression
+  target_col: 0   # Use the first target column
+  task: regression
+  loss_type: mse
+  monitor_metric: mae
+  task_level: graph
+
+#splits
+split_params:
+  learning_setting: inductive
+  data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
+  data_seed: 0
+  split_type: random  # OCELOTv1 doesn't have predefined splits
+  k: 10  # for k-fold Cross-Validation
+  train_prop: 0.8  # for random strategy splitting
+
+# Dataloader parameters
+dataloader_params:
+  batch_size: 64
+  num_workers: 4
+  pin_memory: True
diff --git a/configs/dataset/hypergraph/conjugated_opv_train.yaml b/configs/dataset/hypergraph/conjugated_opv_train.yaml
@@ -0,0 +1,36 @@
+# Dataset loader config - OPV train split
+loader:
+  _target_: topobench.data.loaders.ConjugatedMoleculeDatasetLoader
+  parameters: 
+    data_domain: hypergraph
+    data_type: conjugated_molecules
+    data_name: OPV
+    split: train  # Options: train, valid, test
+    task: default  # Options: default, polymer
+    target_col: ${dataset.parameters.target_col}
+    data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}
+
+# Dataset parameters
+parameters:
+  num_features: 9  # OGB atom features
+  num_classes: 1
+  target_col: 0
+  task: regression
+  loss_type: mse
+  monitor_metric: mae
+  task_level: graph
+
+#splits
+split_params:
+  learning_setting: inductive
+  data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
+  data_seed: 0
+  split_type: fixed  # OPV has predefined train/valid/test splits
+  k: 10  # not used for fixed splits
+  train_prop: 0.8  # not used for fixed splits
+
+# Dataloader parameters
+dataloader_params:
+  batch_size: 64
+  num_workers: 4
+  pin_memory: True
diff --git a/configs/dataset/hypergraph/conjugated_opv_train_polymer.yaml b/configs/dataset/hypergraph/conjugated_opv_train_polymer.yaml
@@ -0,0 +1,36 @@
+# Dataset loader config - OPV train split (polymer task)
+loader:
+  _target_: topobench.data.loaders.ConjugatedMoleculeDatasetLoader
+  parameters: 
+    data_domain: hypergraph
+    data_type: conjugated_molecules
+    data_name: OPV
+    split: train
+    task: polymer  # Filters molecules with complete extrapolated properties
+    target_col: ${dataset.parameters.target_col}
+    data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}
+
+# Dataset parameters
+parameters:
+  num_features: 9  # OGB atom features
+  num_classes: 1
+  target_col: 0
+  task: regression
+  loss_type: mse
+  monitor_metric: mae
+  task_level: graph
+
+#splits
+split_params:
+  learning_setting: inductive
+  data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
+  data_seed: 0
+  split_type: fixed
+  k: 10
+  train_prop: 0.8
+
+# Dataloader parameters
+dataloader_params:
+  batch_size: 64
+  num_workers: 4
+  pin_memory: True
diff --git a/configs/dataset/hypergraph/conjugated_pcqm4mv2.yaml b/configs/dataset/hypergraph/conjugated_pcqm4mv2.yaml
@@ -0,0 +1,32 @@
+# Dataset loader config - PCQM4Mv2
+loader:
+  _target_: topobench.data.loaders.ConjugatedMoleculeDatasetLoader
+  parameters: 
+    data_domain: hypergraph
+    data_type: conjugated_molecules
+    data_name: PCQM4MV2
+    data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}
+
+# Dataset parameters
+parameters:
+  num_features: 9  # OGB atom features
+  num_classes: 1  # Single target: homolumogap
+  task: regression
+  loss_type: mse
+  monitor_metric: mae
+  task_level: graph
+
+#splits
+split_params:
+  learning_setting: inductive
+  data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
+  data_seed: 0
+  split_type: fixed  # PCQM4Mv2 has predefined splits
+  k: 10  # not used for fixed splits
+  train_prop: 0.8  # not used for fixed splits
+
+# Dataloader parameters
+dataloader_params:
+  batch_size: 64
+  num_workers: 4
+  pin_memory: True
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,11 +6,11 @@ build-backend = "setuptools.build_meta"
 name = "TopoBench"
 dynamic = ["version"]
 authors = [
-    {name = "Topological Intelligence Team Authors", email = "tlscabinet@gmail.com"}
+    { name = "Topological Intelligence Team Authors", email = "tlscabinet@gmail.com" },
 ]
 readme = "README.md"
 description = "Topological Deep Learning"
-license = {file = "LICENSE.txt"}
+license = { file = "LICENSE.txt" }
 classifiers = [
     "License :: OSI Approved :: MIT License",
     "Development Status :: 4 - Beta",
@@ -21,10 +21,10 @@ classifiers = [
     "Natural Language :: English",
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11"
+    "Programming Language :: Python :: 3.11",
 ]
-requires-python = ">= 3.10"
-dependencies=[
+requires-python = "== 3.11.3"
+dependencies = [
     "tqdm",
     "charset-normalizer",
     "numpy",
@@ -54,6 +54,7 @@ dependencies=[
     "topomodelx @ git+https://github.com/pyt-team/TopoModelX.git",
     "toponetx @ git+https://github.com/pyt-team/TopoNetX.git",
     "lightning==2.4.0",
+    "rdkit",
 ]
 
 [project.optional-dependencies]
@@ -65,30 +66,20 @@ doc = [
     "sphinx",
     "sphinx_gallery",
     "pydata-sphinx-theme",
-    "myst_parser"
-]
-lint = [
-    "pre-commit",
-    "ruff"
-]
-test = [
-    "pytest",
-    "pytest-cov",
-    "coverage",
-    "jupyter",
-    "mypy",
-    "pytest-mock"
+    "myst_parser",
 ]
+lint = ["pre-commit", "ruff"]
+test = ["pytest", "pytest-cov", "coverage", "jupyter", "mypy", "pytest-mock"]
 
 dev = ["TopoBench[test, lint]"]
 all = ["TopoBench[dev, doc]"]
 
 [project.urls]
-homepage="https://geometric-intelligence.github.io/topobench/index.html"
-repository="https://github.com/geometric-intelligence/TopoBench"
+homepage = "https://geometric-intelligence.github.io/topobench/index.html"
+repository = "https://github.com/geometric-intelligence/TopoBench"
 
 [tool.black]
-line-length = 79  # PEP 8 standard for maximum line length
+line-length = 79           # PEP 8 standard for maximum line length
 target-version = ['py310']
 
 [tool.docformatter]
@@ -99,35 +90,35 @@ wrap-descriptions = 79
 target-version = "py310"
 #extend-include = ["*.ipynb"]
 extend-exclude = ["test", "tutorials", "notebooks"]
-line-length = 79  # PEP 8 standard for maximum line length
+line-length = 79                                    # PEP 8 standard for maximum line length
 
 [tool.ruff.format]
 docstring-code-format = false
 
 [tool.ruff.lint]
 select = [
-    "F", # pyflakes errors
-    "E", # code style
-    "W", # warnings
-    "I", # import order
-    "UP", # pyupgrade rules
-    "B", # bugbear rules
-    "PIE", # pie rules
-    "Q", # quote rules
-    "RET", # return rules
-    "SIM", # code simplifications
-    "NPY", # numpy rules
+    "F",    # pyflakes errors
+    "E",    # code style
+    "W",    # warnings
+    "I",    # import order
+    "UP",   # pyupgrade rules
+    "B",    # bugbear rules
+    "PIE",  # pie rules
+    "Q",    # quote rules
+    "RET",  # return rules
+    "SIM",  # code simplifications
+    "NPY",  # numpy rules
     "PERF", # performance rules
 ]
 fixable = ["ALL"]
 ignore = [
-    "E501",     # line too long
-    "RET504",   # Unnecessary assignment before return
-    "RET505",   # Unnecessary `elif` after `return` statement
-    "NPY002",   # Replace legacy `np.random.seed` call with `np.random.Generator`
-    "UP038",    # Use `X | Y` in `isinstance` call instead of `(X, Y)` -- not compatible with python 3.9 (even with __future__ import)
-    "W293",     # Does not allow to have empty lines in multiline comments
-    "PERF203",  # [TODO: fix all such issues] `try`-`except` within a loop incurs performance overhead
+    "E501",    # line too long
+    "RET504",  # Unnecessary assignment before return
+    "RET505",  # Unnecessary `elif` after `return` statement
+    "NPY002",  # Replace legacy `np.random.seed` call with `np.random.Generator`
+    "UP038",   # Use `X | Y` in `isinstance` call instead of `(X, Y)` -- not compatible with python 3.9 (even with __future__ import)
+    "W293",    # Does not allow to have empty lines in multiline comments
+    "PERF203", # [TODO: fix all such issues] `try`-`except` within a loop incurs performance overhead
 ]
 
 [tool.ruff.lint.pydocstyle]
@@ -138,13 +129,10 @@ convention = "numpy"
 "tests/*" = ["D"]
 
 [tool.setuptools.dynamic]
-version = {attr = "topobench.__version__"}
+version = { attr = "topobench.__version__" }
 
 [tool.setuptools.packages.find]
-include = [
-    "topobench",
-    "topobench.*"
-]
+include = ["topobench", "topobench.*"]
 
 [tool.mypy]
 warn_redundant_casts = true
@@ -155,26 +143,18 @@ plugins = "numpy.typing.mypy_plugin"
 
 [[tool.mypy.overrides]]
 module = [
-    "torch_cluster.*","networkx.*","scipy.spatial","scipy.sparse","toponetx.classes.simplicial_complex"
+    "torch_cluster.*",
+    "networkx.*",
+    "scipy.spatial",
+    "scipy.sparse",
+    "toponetx.classes.simplicial_complex",
 ]
 ignore_missing_imports = true
 
 [tool.pytest.ini_options]
 addopts = "--capture=no"
-pythonpath = [
-  "."
-]
+pythonpath = ["."]
 
 [tool.numpydoc_validation]
-checks = [
-    "all",
-    "GL01",
-    "ES01",
-    "EX01",
-    "SA01"
-]
-exclude = [
-    '\.undocumented_method$',
-    '\.__init__$',
-    '\.__repr__$',
-]
+checks = ["all", "GL01", "ES01", "EX01", "SA01"]
+exclude = ['\.undocumented_method$', '\.__init__$', '\.__repr__$']
diff --git a/test/data/load/test_datasetloaders.py b/test/data/load/test_datasetloaders.py
@@ -45,7 +45,7 @@ def _gather_config_files(self, base_dir: Path) -> List[str]:
                             }
 
         # Below the datasets that takes quite some time to load and process                            
-        self.long_running_datasets = {"mantra_name.yaml", "mantra_orientation.yaml", "mantra_genus.yaml", "mantra_betti_numbers.yaml"}
+        self.long_running_datasets = {"mantra_name.yaml", "mantra_orientation.yaml", "mantra_genus.yaml", "mantra_betti_numbers.yaml", "conjugated_pcqm4mv2.yaml"}
 
 
         for dir_path in config_base_dir.iterdir():

diff --git a/test/pipeline/test_pipeline.py b/test/pipeline/test_pipeline.py
@@ -4,8 +4,8 @@
 from test._utils.simplified_pipeline import run
 
 
-DATASET = "graph/MUTAG"                                                 # ADD YOUR DATASET HERE
-MODELS   = ["graph/gcn", "cell/topotune", "simplicial/topotune"]        # ADD ONE OR SEVERAL MODELS OF YOUR CHOICE HERE
+DATASET = "hypergraph/conjugated_ocelotv1"     
+MODELS   = ["hypergraph/edgnn"]        
 
 
 class TestPipeline:
@@ -23,7 +23,7 @@ def test_pipeline(self):
                     config_name="run.yaml",
                     overrides=[
                         f"model={MODEL}",
-                        f"dataset={DATASET}", # IF YOU IMPLEMENT A LARGE DATASET WITH AN OPTION TO USE A SLICE OF IT, ADD BELOW THE CORRESPONDING OPTION
+                        f"dataset={DATASET}", 
                         "trainer.max_epochs=2",
                         "trainer.min_epochs=1",
                         "trainer.check_val_every_n_epoch=1",
@@ -32,4 +32,6 @@ def test_pipeline(self):
                     ],
                     return_hydra_config=True
                 )
+
+                print(cfg)
                 run(cfg)