-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_TeachStud.py
More file actions
144 lines (117 loc) · 3.88 KB
/
extract_TeachStud.py
File metadata and controls
144 lines (117 loc) · 3.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Code to extract Teacher and Student associated with each person
import rdflib
import os
from rdflib.namespace import RDF, RDFS, SKOS, OWL, Namespace, NamespaceManager, XSD, URIRef
import csv
import pyewts
import sys
BDR = Namespace("http://purl.bdrc.io/resource/")
BDO = Namespace("http://purl.bdrc.io/ontology/core/")
BDA = Namespace("http://purl.bdrc.io/admindata/")
BDG = Namespace("http://purl.bdrc.io/graph/")
ADM = Namespace("http://purl.bdrc.io/ontology/admin/")
WD = Namespace("http://www.wikidata.org/entity/")
WDT = Namespace("http://www.wikidata.org/prop/direct/")
DILA = Namespace("http://purl.dila.edu.tw/resource/")
VIAF = Namespace("http://viaf.org/viaf/")
NSM = NamespaceManager(rdflib.Graph())
NSM.bind("bdr", BDR)
NSM.bind("bdg", BDG)
NSM.bind("bdo", BDO)
NSM.bind("bda", BDA)
NSM.bind("adm", ADM)
NSM.bind("skos", SKOS)
NSM.bind("rdfs", RDFS)
NSM.bind("wd", WD)
NSM.bind("owl", OWL)
NSM.bind("wdt", WDT)
NSM.bind("dila", DILA)
NSM.bind("viaf", VIAF)
# see https://github.com/RDFLib/rdflib/issues/806
if rdflib.__version__ == '4.2.2':
x = rdflib.term._toPythonMapping.pop(rdflib.XSD['gYear'])
converter = pyewts.pyewts()
# Function to extract values for teachers and students
def extract(g, id):
values = {}
typeD = ""
# ID's Teachers
for _, _, teID in g.triples((BDR[id], BDO.personStudentOf, None)):
# Gets teachers of a person
typeD = "teachers"
_, _, teachID = NSM.compute_qname_strict(teID)
if typeD not in values:
values[typeD] = []
values[typeD].append(teachID)
# ID's Students
for _, _, stID in g.triples((BDR[id], BDO.personTeacherOf, None)):
# Gets students of a person
typeD = "students"
_, _, studID = NSM.compute_qname_strict(stID)
if typeD not in values:
values[typeD] = []
values[typeD].append(studID)
return values
# Creates list with the extracted data
def createList(personID, vals, COUNTPROP):
row = []
row.append(personID)
for tp, nbcols in COUNTPROP.items():
if tp not in vals:
continue
if nbcols < len(vals[tp]):
print("!!Error!! There should be at least %i columns for %s aliases" % (len(vals[tp]), tp))
print(personID)
continue
for i in range(nbcols):
if i < len(vals[tp]):
row.append(vals[tp][i])
else:
row.append("")
return row
# Wrapper function for all function call
def run(file_path, id, entity_list):
ext_val = {}
g = rdflib.ConjunctiveGraph()
g.parse(file_path, format="trig")
ext_val = extract(g, id)
# Dictionary for number of teachers and students
COUNTPROP = {
"teachers" : 40, #Maximum number of teachers associated with a Person
"students" : 80 #Maximum number of students associated with a Person
}
nlist = createList(id, ext_val, COUNTPROP)
entity_list.append(nlist)
# Function to create CSV using master list
def createCSV(all_list):
with open('ExtractProp1.csv', "a") as f:
writer = csv.writer(f)
for r in all_list:
writer.writerow(r)
def main():
main_list = []
dir = os.listdir('persons')
folder = 'persons/'
directories = []
for dir_name in dir:
if dir_name.find(".git") == -1:
directory = folder + dir_name
directories.append(directory)
else:
continue
for d in directories:
l = os.listdir(d)
person_links = []
for f in l:
file_p = d + "/" + f
person_links.append(file_p)
for file in person_links:
c = file.rsplit('/', 1)[-1]
id = c[:-5]
if id.find("P0RK") == -1:
run(file, id, main_list)
else:
continue
createCSV(main_list)
if __name__ == "__main__":
main()