-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path5_create_knowledge_base.py
More file actions
36 lines (25 loc) · 973 Bytes
/
5_create_knowledge_base.py
File metadata and controls
36 lines (25 loc) · 973 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
"""
Creates a knowledge based off of the manually determined top 10 terms
"""
import pathlib
import pickle
def main():
with open(pathlib.Path.cwd().joinpath('data', 'manually_determined_top_10_terms.txt'), 'r', encoding='utf-8') as f:
important_terms = [line.strip() for line in f]
knowledge_base = {}
# loop through the terms
for term in important_terms:
facts = []
for i in range(1, 16):
with open(pathlib.Path.cwd().joinpath('data', 'text', f'text_{i}_clean.txt'), 'r', encoding='utf-8') as f:
text = f.read().splitlines()
for sentence in text:
if term in sentence:
facts.append(sentence)
knowledge_base[term] = facts
pickle.dump(knowledge_base, open('knowledge_base.p', 'wb'))
# print the knowledge base
for term in important_terms:
print(term, ":", knowledge_base[term])
if __name__ == '__main__':
main()