Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions configs/dataset/hypergraph/conjugated_ocelotv1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Dataset loader config
loader:
_target_: topobench.data.loaders.ConjugatedMoleculeDatasetLoader
parameters:
data_domain: hypergraph
data_type: conjugated_molecules
data_name: OCELOTv1
data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}
target_col: ${dataset.parameters.target_col}
task: ${dataset.parameters.task}

# Dataset parameters
parameters:
num_features: 9 # OGB atom features
num_classes: 1 # Single target regression
target_col: 0 # Use the first target column
task: regression
loss_type: mse
monitor_metric: mae
task_level: graph

#splits
split_params:
learning_setting: inductive
data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
data_seed: 0
split_type: random # OCELOTv1 doesn't have predefined splits
k: 10 # for k-fold Cross-Validation
train_prop: 0.8 # for random strategy splitting

# Dataloader parameters
dataloader_params:
batch_size: 64
num_workers: 4
pin_memory: True
36 changes: 36 additions & 0 deletions configs/dataset/hypergraph/conjugated_opv_train.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Dataset loader config - OPV train split
loader:
_target_: topobench.data.loaders.ConjugatedMoleculeDatasetLoader
parameters:
data_domain: hypergraph
data_type: conjugated_molecules
data_name: OPV
split: train # Options: train, valid, test
task: default # Options: default, polymer
target_col: ${dataset.parameters.target_col}
data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}

# Dataset parameters
parameters:
num_features: 9 # OGB atom features
num_classes: 1
target_col: 0
task: regression
loss_type: mse
monitor_metric: mae
task_level: graph

#splits
split_params:
learning_setting: inductive
data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
data_seed: 0
split_type: fixed # OPV has predefined train/valid/test splits
k: 10 # not used for fixed splits
train_prop: 0.8 # not used for fixed splits

# Dataloader parameters
dataloader_params:
batch_size: 64
num_workers: 4
pin_memory: True
36 changes: 36 additions & 0 deletions configs/dataset/hypergraph/conjugated_opv_train_polymer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Dataset loader config - OPV train split (polymer task)
loader:
_target_: topobench.data.loaders.ConjugatedMoleculeDatasetLoader
parameters:
data_domain: hypergraph
data_type: conjugated_molecules
data_name: OPV
split: train
task: polymer # Filters molecules with complete extrapolated properties
target_col: ${dataset.parameters.target_col}
data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}

# Dataset parameters
parameters:
num_features: 9 # OGB atom features
num_classes: 1
target_col: 0
task: regression
loss_type: mse
monitor_metric: mae
task_level: graph

#splits
split_params:
learning_setting: inductive
data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
data_seed: 0
split_type: fixed
k: 10
train_prop: 0.8

# Dataloader parameters
dataloader_params:
batch_size: 64
num_workers: 4
pin_memory: True
32 changes: 32 additions & 0 deletions configs/dataset/hypergraph/conjugated_pcqm4mv2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Dataset loader config - PCQM4Mv2
loader:
_target_: topobench.data.loaders.ConjugatedMoleculeDatasetLoader
parameters:
data_domain: hypergraph
data_type: conjugated_molecules
data_name: PCQM4MV2
data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}

# Dataset parameters
parameters:
num_features: 9 # OGB atom features
num_classes: 1 # Single target: homolumogap
task: regression
loss_type: mse
monitor_metric: mae
task_level: graph

#splits
split_params:
learning_setting: inductive
data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
data_seed: 0
split_type: fixed # PCQM4Mv2 has predefined splits
k: 10 # not used for fixed splits
train_prop: 0.8 # not used for fixed splits

# Dataloader parameters
dataloader_params:
batch_size: 64
num_workers: 4
pin_memory: True
102 changes: 41 additions & 61 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ build-backend = "setuptools.build_meta"
name = "TopoBench"
dynamic = ["version"]
authors = [
{name = "Topological Intelligence Team Authors", email = "tlscabinet@gmail.com"}
{ name = "Topological Intelligence Team Authors", email = "tlscabinet@gmail.com" },
]
readme = "README.md"
description = "Topological Deep Learning"
license = {file = "LICENSE.txt"}
license = { file = "LICENSE.txt" }
classifiers = [
"License :: OSI Approved :: MIT License",
"Development Status :: 4 - Beta",
Expand All @@ -21,10 +21,10 @@ classifiers = [
"Natural Language :: English",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11"
"Programming Language :: Python :: 3.11",
]
requires-python = ">= 3.10"
dependencies=[
requires-python = "== 3.11.3"
dependencies = [
"tqdm",
"charset-normalizer",
"numpy",
Expand Down Expand Up @@ -54,6 +54,7 @@ dependencies=[
"topomodelx @ git+https://github.com/pyt-team/TopoModelX.git",
"toponetx @ git+https://github.com/pyt-team/TopoNetX.git",
"lightning==2.4.0",
"rdkit",
]

[project.optional-dependencies]
Expand All @@ -65,30 +66,20 @@ doc = [
"sphinx",
"sphinx_gallery",
"pydata-sphinx-theme",
"myst_parser"
]
lint = [
"pre-commit",
"ruff"
]
test = [
"pytest",
"pytest-cov",
"coverage",
"jupyter",
"mypy",
"pytest-mock"
"myst_parser",
]
lint = ["pre-commit", "ruff"]
test = ["pytest", "pytest-cov", "coverage", "jupyter", "mypy", "pytest-mock"]

dev = ["TopoBench[test, lint]"]
all = ["TopoBench[dev, doc]"]

[project.urls]
homepage="https://geometric-intelligence.github.io/topobench/index.html"
repository="https://github.com/geometric-intelligence/TopoBench"
homepage = "https://geometric-intelligence.github.io/topobench/index.html"
repository = "https://github.com/geometric-intelligence/TopoBench"

[tool.black]
line-length = 79 # PEP 8 standard for maximum line length
line-length = 79 # PEP 8 standard for maximum line length
target-version = ['py310']

[tool.docformatter]
Expand All @@ -99,35 +90,35 @@ wrap-descriptions = 79
target-version = "py310"
#extend-include = ["*.ipynb"]
extend-exclude = ["test", "tutorials", "notebooks"]
line-length = 79 # PEP 8 standard for maximum line length
line-length = 79 # PEP 8 standard for maximum line length

[tool.ruff.format]
docstring-code-format = false

[tool.ruff.lint]
select = [
"F", # pyflakes errors
"E", # code style
"W", # warnings
"I", # import order
"UP", # pyupgrade rules
"B", # bugbear rules
"PIE", # pie rules
"Q", # quote rules
"RET", # return rules
"SIM", # code simplifications
"NPY", # numpy rules
"F", # pyflakes errors
"E", # code style
"W", # warnings
"I", # import order
"UP", # pyupgrade rules
"B", # bugbear rules
"PIE", # pie rules
"Q", # quote rules
"RET", # return rules
"SIM", # code simplifications
"NPY", # numpy rules
"PERF", # performance rules
]
fixable = ["ALL"]
ignore = [
"E501", # line too long
"RET504", # Unnecessary assignment before return
"RET505", # Unnecessary `elif` after `return` statement
"NPY002", # Replace legacy `np.random.seed` call with `np.random.Generator`
"UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)` -- not compatible with python 3.9 (even with __future__ import)
"W293", # Does not allow to have empty lines in multiline comments
"PERF203", # [TODO: fix all such issues] `try`-`except` within a loop incurs performance overhead
"E501", # line too long
"RET504", # Unnecessary assignment before return
"RET505", # Unnecessary `elif` after `return` statement
"NPY002", # Replace legacy `np.random.seed` call with `np.random.Generator`
"UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)` -- not compatible with python 3.9 (even with __future__ import)
"W293", # Does not allow to have empty lines in multiline comments
"PERF203", # [TODO: fix all such issues] `try`-`except` within a loop incurs performance overhead
]

[tool.ruff.lint.pydocstyle]
Expand All @@ -138,13 +129,10 @@ convention = "numpy"
"tests/*" = ["D"]

[tool.setuptools.dynamic]
version = {attr = "topobench.__version__"}
version = { attr = "topobench.__version__" }

[tool.setuptools.packages.find]
include = [
"topobench",
"topobench.*"
]
include = ["topobench", "topobench.*"]

[tool.mypy]
warn_redundant_casts = true
Expand All @@ -155,26 +143,18 @@ plugins = "numpy.typing.mypy_plugin"

[[tool.mypy.overrides]]
module = [
"torch_cluster.*","networkx.*","scipy.spatial","scipy.sparse","toponetx.classes.simplicial_complex"
"torch_cluster.*",
"networkx.*",
"scipy.spatial",
"scipy.sparse",
"toponetx.classes.simplicial_complex",
]
ignore_missing_imports = true

[tool.pytest.ini_options]
addopts = "--capture=no"
pythonpath = [
"."
]
pythonpath = ["."]

[tool.numpydoc_validation]
checks = [
"all",
"GL01",
"ES01",
"EX01",
"SA01"
]
exclude = [
'\.undocumented_method$',
'\.__init__$',
'\.__repr__$',
]
checks = ["all", "GL01", "ES01", "EX01", "SA01"]
exclude = ['\.undocumented_method$', '\.__init__$', '\.__repr__$']
2 changes: 1 addition & 1 deletion test/data/load/test_datasetloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _gather_config_files(self, base_dir: Path) -> List[str]:
}

# Below the datasets that takes quite some time to load and process
self.long_running_datasets = {"mantra_name.yaml", "mantra_orientation.yaml", "mantra_genus.yaml", "mantra_betti_numbers.yaml"}
self.long_running_datasets = {"mantra_name.yaml", "mantra_orientation.yaml", "mantra_genus.yaml", "mantra_betti_numbers.yaml", "conjugated_pcqm4mv2.yaml"}


for dir_path in config_base_dir.iterdir():
Expand Down
8 changes: 5 additions & 3 deletions test/pipeline/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from test._utils.simplified_pipeline import run


DATASET = "graph/MUTAG" # ADD YOUR DATASET HERE
MODELS = ["graph/gcn", "cell/topotune", "simplicial/topotune"] # ADD ONE OR SEVERAL MODELS OF YOUR CHOICE HERE
DATASET = "hypergraph/conjugated_ocelotv1"
MODELS = ["hypergraph/edgnn"]


class TestPipeline:
Expand All @@ -23,7 +23,7 @@ def test_pipeline(self):
config_name="run.yaml",
overrides=[
f"model={MODEL}",
f"dataset={DATASET}", # IF YOU IMPLEMENT A LARGE DATASET WITH AN OPTION TO USE A SLICE OF IT, ADD BELOW THE CORRESPONDING OPTION
f"dataset={DATASET}",
"trainer.max_epochs=2",
"trainer.min_epochs=1",
"trainer.check_val_every_n_epoch=1",
Expand All @@ -32,4 +32,6 @@ def test_pipeline(self):
],
return_hydra_config=True
)

print(cfg)
run(cfg)
Loading
Loading