From 63736f1b09fccb1d3fb2a9f162e2bc7751efe2f5 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 23 Dec 2020 12:48:13 +0100 Subject: [PATCH 1/6] Reorganize to src layout Also switch the encapsulation of code in main.py --- src/kgreasoning/__init__.py | 0 dataloader.py => src/kgreasoning/dataloader.py | 0 main.py => src/kgreasoning/main.py | 9 +++++++-- models.py => src/kgreasoning/models.py | 0 util.py => src/kgreasoning/util.py | 0 5 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 src/kgreasoning/__init__.py rename dataloader.py => src/kgreasoning/dataloader.py (100%) rename main.py => src/kgreasoning/main.py (97%) rename models.py => src/kgreasoning/models.py (100%) rename util.py => src/kgreasoning/util.py (100%) diff --git a/src/kgreasoning/__init__.py b/src/kgreasoning/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataloader.py b/src/kgreasoning/dataloader.py similarity index 100% rename from dataloader.py rename to src/kgreasoning/dataloader.py diff --git a/main.py b/src/kgreasoning/main.py similarity index 97% rename from main.py rename to src/kgreasoning/main.py index 6b76007..cd10034 100755 --- a/main.py +++ b/src/kgreasoning/main.py @@ -193,7 +193,7 @@ def load_data(args, tasks): return train_queries, train_answers, valid_queries, valid_hard_answers, valid_easy_answers, test_queries, test_hard_answers, test_easy_answers -def main(args): +def main_helper(args): set_global_seed(args.seed) tasks = args.tasks.split('.') for task in tasks: @@ -445,5 +445,10 @@ def main(args): logging.info("Training finished!!") + +def main(): + main_helper(parse_args()) + + if __name__ == '__main__': - main(parse_args()) \ No newline at end of file + main() diff --git a/models.py b/src/kgreasoning/models.py similarity index 100% rename from models.py rename to src/kgreasoning/models.py diff --git a/util.py b/src/kgreasoning/util.py similarity index 100% rename from util.py rename to src/kgreasoning/util.py From e2e0cf5be2dd4cb88d861b9f90f0a69caeb38634 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 23 Dec 2020 12:52:58 +0100 Subject: [PATCH 2/6] Add setup configuration and update readme --- README.md | 16 ++++++++++++++++ setup.cfg | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 8 ++++++++ 3 files changed, 74 insertions(+) create mode 100644 setup.cfg create mode 100644 setup.py diff --git a/README.md b/README.md index ee1a5e2..c35856a 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,22 @@ This repo contains several algorithms for multi-hop reasoning on knowledge graph - [x] [Query2box](https://arxiv.org/abs/2002.05969) - [x] [GQE](https://arxiv.org/abs/1806.01445) +## Installation + +Install directly from GitHub with: + +```shell +$ pip install git+https://github.com/snap-stanford/KGReasoning.git +``` + +Install in development mode with: + +```shell +$ git clone https://github.com/snap-stanford/KGReasoning.git +$ cd KGReasoning +$ pip install -e . +``` + **KG Data** The KG data (FB15k, FB15k-237, NELL995) mentioned in the BetaE paper and the Query2box paper can be downloaded [here](http://snap.stanford.edu/betae/KG_data.zip). Note the two use the same training queries, but the difference is that the valid/test queries in BetaE paper have a maximum number of answers, making it more realistic. diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..20f1429 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,50 @@ +########################## +# Setup.py Configuration # +########################## +[metadata] +name = kgreasoning +version = 0.0.1-dev + +# URLs associated with the project +url = https://github.com/snap-stanford/KGReasoning +download_url = https://github.com/snap-stanford/KGReasoning/releases +project_urls = + Bug Tracker = https://github.com/snap-stanford/KGReasoning/issues + Source Code = https://github.com/snap-stanford/KGReasoning + +# Author information +author = Hongyu Ren +author_email = hyren@cs.stanford.edu + +# License Information +license = MIT +license_file = LICENSE + +# Search tags +classifiers = + Development Status :: 1 - Planning + License :: OSI Approved :: MIT License + Operating System :: OS Independent + Programming Language :: Python + +[options] +install_requires = + torch + tqdm + +# Random options +zip_safe = false +include_package_data = True +python_requires = >=3.6 + +# Where is my code +packages = find: +package_dir = + = src + +[options.packages.find] +where = src + +[options.entry_points] +console_scripts = + kgreasoning = kgreasoning.main:main diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a78fbfd --- /dev/null +++ b/setup.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- + +"""Setup module.""" + +import setuptools + +if __name__ == '__main__': + setuptools.setup() From 3aadd3f8a999a1b9c04a16f80f6932e0cd92103b Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 23 Dec 2020 12:53:56 +0100 Subject: [PATCH 3/6] Use vanity CLI --- README.md | 12 ++++++++++++ example.sh | 20 ++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index c35856a..6f7aeb4 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,18 @@ $ cd KGReasoning $ pip install -e . ``` +## Command Line Interface + +The `kgreasoning` command line tool is automatically installed. It can +be used like in the following: + +```bash +$ CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ + --data_path data/FB15k-237-betae -n 128 -b 512 -d 800 -g 24 \ + -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo vec --valid_steps 15000 \ + --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" +``` + **KG Data** The KG data (FB15k, FB15k-237, NELL995) mentioned in the BetaE paper and the Query2box paper can be downloaded [here](http://snap.stanford.edu/betae/KG_data.zip). Note the two use the same training queries, but the difference is that the valid/test queries in BetaE paper have a maximum number of answers, making it more realistic. diff --git a/example.sh b/example.sh index 0f5b816..7cbe6a7 100755 --- a/example.sh +++ b/example.sh @@ -1,50 +1,50 @@ # FB15k-237 -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/FB15k-237-betae -n 128 -b 512 -d 800 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo vec --valid_steps 15000 \ --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/FB15k-237-betae -n 128 -b 512 -d 400 -g 60 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo beta --valid_steps 15000 \ -betam "(1600,2)" -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/FB15k-237-betae -n 128 -b 512 -d 400 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo box --valid_steps 15000 \ -boxm "(none,0.02)" --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" # FB15k -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/FB15k-betae -n 128 -b 512 -d 800 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo vec --valid_steps 15000 \ --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/FB15k-betae -n 128 -b 512 -d 400 -g 60 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo beta --valid_steps 15000 \ -betam "(1600,2)" -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/FB15k-betae -n 128 -b 512 -d 400 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo box --valid_steps 15000 \ -boxm "(none,0.02)" --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" # NELL -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/NELL-betae -n 128 -b 512 -d 800 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo vec --valid_steps 15000 \ --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/NELL-betae -n 128 -b 512 -d 400 -g 60 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo beta --valid_steps 15000 \ -betam "(1600,2)" -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/NELL-betae -n 128 -b 512 -d 400 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo box --valid_steps 15000 \ -boxm "(none,0.02)" --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" @@ -52,7 +52,7 @@ CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ ## Evaluation -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_test \ --data_path data/FB15k-237-betae -n 128 -b 512 -d 400 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo box --valid_steps 15000 \ -boxm "(none,0.02)" --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" --checkpoint_path $CKPT_PATH From e5f76f5f8db15aae3f6ca5277a407df43af85df3 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 23 Dec 2020 12:57:07 +0100 Subject: [PATCH 4/6] Add missing requirement --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 20f1429..26c9bf6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,6 +31,7 @@ classifiers = install_requires = torch tqdm + tensorboardX # Random options zip_safe = false From b7cc7154b41ee6c97ef13b797243133d7b874483 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 23 Dec 2020 12:58:08 +0100 Subject: [PATCH 5/6] Update imports --- src/kgreasoning/main.py | 6 +++--- src/kgreasoning/models.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/kgreasoning/main.py b/src/kgreasoning/main.py index cd10034..ad83cd0 100755 --- a/src/kgreasoning/main.py +++ b/src/kgreasoning/main.py @@ -13,14 +13,14 @@ import numpy as np import torch from torch.utils.data import DataLoader -from models import KGReasoning -from dataloader import TestDataset, TrainDataset, SingledirectionalOneShotIterator +from kgreasoning.models import KGReasoning +from kgreasoning.dataloader import TestDataset, TrainDataset, SingledirectionalOneShotIterator from tensorboardX import SummaryWriter import time import pickle from collections import defaultdict from tqdm import tqdm -from util import flatten_query, list2tuple, parse_time, set_global_seed, eval_tuple +from kgreasoning.util import flatten_query, list2tuple, parse_time, set_global_seed, eval_tuple query_name_dict = {('e',('r',)): '1p', ('e', ('r', 'r')): '2p', diff --git a/src/kgreasoning/models.py b/src/kgreasoning/models.py index b761009..524b3b3 100755 --- a/src/kgreasoning/models.py +++ b/src/kgreasoning/models.py @@ -10,7 +10,7 @@ import torch.nn as nn import torch.nn.functional as F from torch.utils.data import DataLoader -from dataloader import TestDataset, TrainDataset, SingledirectionalOneShotIterator +from kgreasoning.dataloader import TestDataset, TrainDataset, SingledirectionalOneShotIterator import random import pickle import math From 249ae9a5276311ded028f6f13aac0b2bdf7fca2c Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 23 Dec 2020 13:02:54 +0100 Subject: [PATCH 6/6] Update dataloader.py --- src/kgreasoning/dataloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kgreasoning/dataloader.py b/src/kgreasoning/dataloader.py index 5f4f953..9c3230e 100755 --- a/src/kgreasoning/dataloader.py +++ b/src/kgreasoning/dataloader.py @@ -8,7 +8,7 @@ import torch from torch.utils.data import Dataset -from util import list2tuple, tuple2list, flatten +from kgreasoning.util import list2tuple, tuple2list, flatten class TestDataset(Dataset): def __init__(self, queries, nentity, nrelation):