From 5c814e09e44778157f09a58e8e848b7452130c70 Mon Sep 17 00:00:00 2001 From: Andrea Bacciu Date: Tue, 17 May 2022 23:05:05 +0200 Subject: [PATCH 1/3] Fix missing requirements in dataset-construction --- dataset-construction/requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dataset-construction/requirements.txt b/dataset-construction/requirements.txt index f0d23e4..cd44e01 100644 --- a/dataset-construction/requirements.txt +++ b/dataset-construction/requirements.txt @@ -1,3 +1,5 @@ tqdm pymongo -numpy \ No newline at end of file +numpy +nltk==3.7 +strsim==0.0.3 From 7b96cbe301749e6fa469a531dd04f7bd2a712c0c Mon Sep 17 00:00:00 2001 From: Andrea Bacciu Date: Sun, 22 May 2022 14:49:10 +0200 Subject: [PATCH 2/3] Removed a missing dependency --- .../src/ndb_data/construction/make_database_initial.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/dataset-construction/src/ndb_data/construction/make_database_initial.py b/dataset-construction/src/ndb_data/construction/make_database_initial.py index d24385e..23c71f2 100644 --- a/dataset-construction/src/ndb_data/construction/make_database_initial.py +++ b/dataset-construction/src/ndb_data/construction/make_database_initial.py @@ -31,8 +31,6 @@ from similarity.normalized_levenshtein import NormalizedLevenshtein from tqdm import tqdm -from ndb_data.util.log_helper import setup_logging - detok = TreebankWordDetokenizer() logger = logging.getLogger(__name__) @@ -71,7 +69,6 @@ def normalize_subject(subject_name, fact): if __name__ == "__main__": - setup_logging() parser = ArgumentParser() parser.add_argument("cache_dir") parser.add_argument("out_file") From 18ae3ac99af60ba8769e8930c2ad40270bdc76e3 Mon Sep 17 00:00:00 2001 From: Andrea Bacciu Date: Thu, 26 May 2022 11:25:23 +0200 Subject: [PATCH 3/3] Create setup.sh in dataset-construction --- dataset-construction/setup.sh | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 dataset-construction/setup.sh diff --git a/dataset-construction/setup.sh b/dataset-construction/setup.sh new file mode 100644 index 0000000..0738087 --- /dev/null +++ b/dataset-construction/setup.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +pip3 install -r requirements.txt +python3 -m nltk.downloader punkt