diff --git a/README.md b/README.md index ee1a5e2..6f7aeb4 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,34 @@ This repo contains several algorithms for multi-hop reasoning on knowledge graph - [x] [Query2box](https://arxiv.org/abs/2002.05969) - [x] [GQE](https://arxiv.org/abs/1806.01445) +## Installation + +Install directly from GitHub with: + +```shell +$ pip install git+https://github.com/snap-stanford/KGReasoning.git +``` + +Install in development mode with: + +```shell +$ git clone https://github.com/snap-stanford/KGReasoning.git +$ cd KGReasoning +$ pip install -e . +``` + +## Command Line Interface + +The `kgreasoning` command line tool is automatically installed. It can +be used like in the following: + +```bash +$ CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ + --data_path data/FB15k-237-betae -n 128 -b 512 -d 800 -g 24 \ + -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo vec --valid_steps 15000 \ + --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" +``` + **KG Data** The KG data (FB15k, FB15k-237, NELL995) mentioned in the BetaE paper and the Query2box paper can be downloaded [here](http://snap.stanford.edu/betae/KG_data.zip). Note the two use the same training queries, but the difference is that the valid/test queries in BetaE paper have a maximum number of answers, making it more realistic. diff --git a/example.sh b/example.sh index 0f5b816..7cbe6a7 100755 --- a/example.sh +++ b/example.sh @@ -1,50 +1,50 @@ # FB15k-237 -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/FB15k-237-betae -n 128 -b 512 -d 800 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo vec --valid_steps 15000 \ --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/FB15k-237-betae -n 128 -b 512 -d 400 -g 60 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo beta --valid_steps 15000 \ -betam "(1600,2)" -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/FB15k-237-betae -n 128 -b 512 -d 400 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo box --valid_steps 15000 \ -boxm "(none,0.02)" --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" # FB15k -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/FB15k-betae -n 128 -b 512 -d 800 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo vec --valid_steps 15000 \ --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/FB15k-betae -n 128 -b 512 -d 400 -g 60 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo beta --valid_steps 15000 \ -betam "(1600,2)" -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/FB15k-betae -n 128 -b 512 -d 400 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo box --valid_steps 15000 \ -boxm "(none,0.02)" --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" # NELL -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/NELL-betae -n 128 -b 512 -d 800 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo vec --valid_steps 15000 \ --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/NELL-betae -n 128 -b 512 -d 400 -g 60 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo beta --valid_steps 15000 \ -betam "(1600,2)" -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_train --do_valid --do_test \ --data_path data/NELL-betae -n 128 -b 512 -d 400 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo box --valid_steps 15000 \ -boxm "(none,0.02)" --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" @@ -52,7 +52,7 @@ CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_train --do_valid --do_test \ ## Evaluation -CUDA_VISIBLE_DEVICES=0 python main.py --cuda --do_test \ +CUDA_VISIBLE_DEVICES=0 kgreasoning --cuda --do_test \ --data_path data/FB15k-237-betae -n 128 -b 512 -d 400 -g 24 \ -lr 0.0001 --max_steps 450001 --cpu_num 1 --geo box --valid_steps 15000 \ -boxm "(none,0.02)" --tasks "1p.2p.3p.2i.3i.ip.pi.2u.up" --checkpoint_path $CKPT_PATH diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..26c9bf6 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,51 @@ +########################## +# Setup.py Configuration # +########################## +[metadata] +name = kgreasoning +version = 0.0.1-dev + +# URLs associated with the project +url = https://github.com/snap-stanford/KGReasoning +download_url = https://github.com/snap-stanford/KGReasoning/releases +project_urls = + Bug Tracker = https://github.com/snap-stanford/KGReasoning/issues + Source Code = https://github.com/snap-stanford/KGReasoning + +# Author information +author = Hongyu Ren +author_email = hyren@cs.stanford.edu + +# License Information +license = MIT +license_file = LICENSE + +# Search tags +classifiers = + Development Status :: 1 - Planning + License :: OSI Approved :: MIT License + Operating System :: OS Independent + Programming Language :: Python + +[options] +install_requires = + torch + tqdm + tensorboardX + +# Random options +zip_safe = false +include_package_data = True +python_requires = >=3.6 + +# Where is my code +packages = find: +package_dir = + = src + +[options.packages.find] +where = src + +[options.entry_points] +console_scripts = + kgreasoning = kgreasoning.main:main diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a78fbfd --- /dev/null +++ b/setup.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- + +"""Setup module.""" + +import setuptools + +if __name__ == '__main__': + setuptools.setup() diff --git a/src/kgreasoning/__init__.py b/src/kgreasoning/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataloader.py b/src/kgreasoning/dataloader.py similarity index 95% rename from dataloader.py rename to src/kgreasoning/dataloader.py index 5f4f953..9c3230e 100755 --- a/dataloader.py +++ b/src/kgreasoning/dataloader.py @@ -8,7 +8,7 @@ import torch from torch.utils.data import Dataset -from util import list2tuple, tuple2list, flatten +from kgreasoning.util import list2tuple, tuple2list, flatten class TestDataset(Dataset): def __init__(self, queries, nentity, nrelation): diff --git a/main.py b/src/kgreasoning/main.py similarity index 96% rename from main.py rename to src/kgreasoning/main.py index 6b76007..ad83cd0 100755 --- a/main.py +++ b/src/kgreasoning/main.py @@ -13,14 +13,14 @@ import numpy as np import torch from torch.utils.data import DataLoader -from models import KGReasoning -from dataloader import TestDataset, TrainDataset, SingledirectionalOneShotIterator +from kgreasoning.models import KGReasoning +from kgreasoning.dataloader import TestDataset, TrainDataset, SingledirectionalOneShotIterator from tensorboardX import SummaryWriter import time import pickle from collections import defaultdict from tqdm import tqdm -from util import flatten_query, list2tuple, parse_time, set_global_seed, eval_tuple +from kgreasoning.util import flatten_query, list2tuple, parse_time, set_global_seed, eval_tuple query_name_dict = {('e',('r',)): '1p', ('e', ('r', 'r')): '2p', @@ -193,7 +193,7 @@ def load_data(args, tasks): return train_queries, train_answers, valid_queries, valid_hard_answers, valid_easy_answers, test_queries, test_hard_answers, test_easy_answers -def main(args): +def main_helper(args): set_global_seed(args.seed) tasks = args.tasks.split('.') for task in tasks: @@ -445,5 +445,10 @@ def main(args): logging.info("Training finished!!") + +def main(): + main_helper(parse_args()) + + if __name__ == '__main__': - main(parse_args()) \ No newline at end of file + main() diff --git a/models.py b/src/kgreasoning/models.py similarity index 97% rename from models.py rename to src/kgreasoning/models.py index b761009..524b3b3 100755 --- a/models.py +++ b/src/kgreasoning/models.py @@ -10,7 +10,7 @@ import torch.nn as nn import torch.nn.functional as F from torch.utils.data import DataLoader -from dataloader import TestDataset, TrainDataset, SingledirectionalOneShotIterator +from kgreasoning.dataloader import TestDataset, TrainDataset, SingledirectionalOneShotIterator import random import pickle import math diff --git a/util.py b/src/kgreasoning/util.py similarity index 100% rename from util.py rename to src/kgreasoning/util.py