-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathadd_wikidata_description.py
More file actions
47 lines (38 loc) · 1.44 KB
/
add_wikidata_description.py
File metadata and controls
47 lines (38 loc) · 1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python
# coding: utf-8
# from qwikidata.entity import WikidataItem, WikidataLexeme, WikidataProperty
from qwikidata.linked_data_interface import get_entity_dict_from_api, LdiResponseNotOk
from tqdm import tqdm
from termcolor import colored
# from nltk.util import ngrams
import requests_cache
import pickle
import numpy as np
import argparse
requests_cache.install_cache('data/cache/wikidata_descriptions', backend='sqlite', expire_after=-1)
tqdm.pandas()
parser = argparse.ArgumentParser()
parser.add_argument(
'mentions_pickle', nargs='?', default="pickles/unambiguous_mention.pickle"
)
parser.add_argument(
'output_pickle', nargs='?', default="pickles/described_unambiguous_mention.pickle"
)
args = parser.parse_args()
with open(args.mentions_pickle, "rb") as f:
mentions = pickle.load(f)
def add_wikidata_description(mention):
try:
wikidata_dict = get_entity_dict_from_api(mention["wikidata_title"])
except LdiResponseNotOk as err:
print(colored(mention.name, color='red'))
print(colored(err, color='red'))
return np.nan
descriptions = wikidata_dict["descriptions"]
if "en" in descriptions:
# print(descriptions["en"])
return descriptions["en"]["value"]
mentions["wikidata_description"] = mentions.progress_apply(add_wikidata_description, axis='columns')
print(mentions)
with open(args.output_pickle, "wb") as f:
pickle.dump(mentions, f, pickle.HIGHEST_PROTOCOL)