diff --git a/.gitignore b/.gitignore index e5819bb..b613cc7 100644 --- a/.gitignore +++ b/.gitignore @@ -121,3 +121,6 @@ cython_debug/ # MacOS .DS_Store + +# Datasets +data/datasets/*.zst diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000..73d56b1 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,3 @@ +# Contributing to Hyperbench + +Thank you for your interest in contributing to Hyperbench! diff --git a/hyperbench/data/dataset.py b/hyperbench/data/dataset.py index 70c3a1d..d04c716 100644 --- a/hyperbench/data/dataset.py +++ b/hyperbench/data/dataset.py @@ -23,11 +23,14 @@ class DatasetNames(Enum): AMAZON = "amazon" CONTACT_HIGH_SCHOOL = "contact-high-school" CONTACT_PRIMARY_SCHOOL = "contact-primary-school" + CORA = "cora" + COURSERA = "coursera" DBLP = "dblp" EMAIL_ENRON = "email-Enron" EMAIL_W3C = "email-W3C" GEOMETRY = "geometry" GOT = "got" + IMBD = "imdb" MUSIC_BLUES_REVIEWS = "music-blues-reviews" NBA = "nba" NDC_CLASSES = "NDC-classes" @@ -37,6 +40,7 @@ class DatasetNames(Enum): THREADS_MATH_SX = "threads-math-sx" TWITTER = "twitter" VEGAS_BARS_REVIEWS = "vegas-bars-reviews" + PATENT = "patent" class HIFConverter: @@ -417,3 +421,19 @@ class DBLPDataset(Dataset): class ThreadsMathsxDataset(Dataset): DATASET_NAME = "THREADSMATHSX" + + +class PatentDataset(Dataset): + DATASET_NAME = "PATENT" + + +class CourseraDataset(Dataset): + DATASET_NAME = "COURSERA" + + +class IMDBDataset(Dataset): + DATASET_NAME = "IMDB" + + +class CoraDataset(Dataset): + DATASET_NAME = "CORA" diff --git a/hyperbench/data/datasets/.gitkeep b/hyperbench/data/datasets/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/hyperbench/data/datasets/algebra.json.zst b/hyperbench/data/datasets/algebra.json.zst deleted file mode 100644 index afb69c4..0000000 Binary files a/hyperbench/data/datasets/algebra.json.zst and /dev/null differ diff --git a/hyperbench/data/datasets/dblp.json.zst b/hyperbench/data/datasets/dblp.json.zst deleted file mode 100644 index 5e4e896..0000000 Binary files a/hyperbench/data/datasets/dblp.json.zst and /dev/null differ diff --git a/hyperbench/tests/train/trainer_test.py b/hyperbench/tests/train/trainer_test.py index c82f121..0ec694c 100644 --- a/hyperbench/tests/train/trainer_test.py +++ b/hyperbench/tests/train/trainer_test.py @@ -20,8 +20,8 @@ def mock_model_configs(): model_config.version = f"{i}" model_config.model = model model_config.trainer = None - model_config.full_model_name = ( - lambda self=model_config: f"{self.name}:{self.version}" + model_config.full_model_name = lambda self=model_config: ( + f"{self.name}:{self.version}" ) model_configs.append(model_config)