-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathReference_variety_module.py
More file actions
237 lines (229 loc) · 12.4 KB
/
Reference_variety_module.py
File metadata and controls
237 lines (229 loc) · 12.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import sys
sys.path.append(r'C:\\Users\\u1269857\\AppData\\Local\\Continuum\\Anaconda3\\Lib\\site-packages')
sys.path.append('C:\\Program Files\\Anaconda3\\Lib\\site-packages')
import xlrd
import re
import numpy
def PlayerReferenceModel(player, soup, homeaway, gap, **kwargs):
previousgaps = kwargs['previousgaplist']
previousreference = []
#Get all the tuples, since these are the previous processed named entities
for idx, previousgap in enumerate(previousgaps):
if isinstance(previousgap, tuple):
#If the named entity is the same as the player of the current event
if previousgap[0] == player:
previousreference.append(previousgap[1])
else:
#In some cases, the player name is mentioned among others (all the goal scorers, all the players that received a yellow card), this converts the player reference to the form used for these lists and checks if the player has been mentioned in this list
try:
fullplayer = soup.find('lineups').find(text=player).parent.parent
except AttributeError:
try:
fullplayer = soup.find('substitutes').find(text=player).parent.parent
except AttributeError:
try:
fullplayer = soup.find('managers').find(text=player).parent.parent
except AttributeError:
try:
fullplayer = soup.find('lineups').find(text=re.compile(player.split()[-1])).parent.parent
except AttributeError:
try:
fullplayer = soup.find('substitutes').find(text=re.compile(player.split()[-1])).parent.parent
except AttributeError:
try:
fullplayer = soup.find('managers').find(text=re.compile(player.split()[-1])).parent.parent
except AttributeError:
print('Named Entity Problem for: ' + player)
sys.exit(1)
try:
fullplayer = fullplayer.find('name').text
except AttributeError:
fullplayer = fullplayer.find('fullname').text
try:
if fullplayer in previousgap:
previousreference.append(fullplayer)
except TypeError:
''
#First find the player's information by looking up the player
manager = 'n'
try:
name = soup.find('lineups').find(text=player).parent.parent
except AttributeError:
try:
name = soup.find('substitutes').find(text=player).parent.parent
except AttributeError:
try:
name = soup.find('managers').find(text=player).parent.parent
manager = 'y'
except AttributeError:
try:
name = soup.find('lineups').find(['name', 'fullname', 'goalcomshownname'], text=re.compile(player.split()[-1], re.I)).parent.parent
except AttributeError:
try:
name = soup.find('substitutes').find(['name', 'fullname', 'goalcomshownname'], text=re.compile(player.split()[-1], re.I)).parent.parent
except AttributeError:
try:
name = soup.find('managers').find(['name', 'fullname', 'goalcomshownname'], text=re.compile(player.split()[-1], re.I)).parent.parent
manager = 'y'
except AttributeError:
print('Named Entity Problem for: ' + player)
sys.exit(1)
#If there is no previous mention of the player, or no recent mention, use one of the following references
namepossibilities = []
if (len(previousreference) == 0):
try:
fullname = name.find('name').text
except AttributeError:
fullname = name.find('fullname').text
namepossibilities.append([fullname, 10])
splitname = str.split(fullname)
firstname = None
lastname = None
if len(splitname) > 1:
firstname = splitname[0]
lastname = ' '.join(splitname[1:])
if lastname[0].islower():
lastname = lastname[0].upper() + lastname[1:]
namepossibilities.append([lastname, 10])
if manager == 'n':
position = name.find('wikiposition').text
if re.search(r'\bgoal', position, re.I):
if lastname != None:
namepossibilities.append(['doelman ' + lastname, 5])
namepossibilities.append(['doelman ' + fullname, 5])
elif (re.search(r'back', position, re.I)) or (re.search(r'defender', position, re.I)) or (re.search(r'sweeper', position, re.I)):
if lastname != None:
namepossibilities.append(['verdediger ' + lastname, 5])
namepossibilities.append(['verdediger ' + fullname, 5])
elif (re.search(r'midfielder', position, re.I)) or (re.search(r'winger', position, re.I)):
if lastname != None:
namepossibilities.append(['middenvelder ' + lastname, 5])
namepossibilities.append(['middenvelder ' + fullname, 5])
elif (re.search(r'forward', position, re.I)) or (re.search(r'striker', position, re.I)) or (re.search(r'attacker', position, re.I)):
if lastname != None:
namepossibilities.append(['aanvaller ' + lastname, 5])
namepossibilities.append(['aanvaller ' + fullname, 5])
else:
if lastname != None:
namepossibilities.append(['manager ' + lastname, 5])
namepossibilities.append(['manager ' + fullname, 5])
#If there was a recent mention of the player, make sure there is a variation to the reference
if len(previousreference) > 0:
try:
fullname = name.find('name').text
except AttributeError:
fullname = name.find('fullname').text
if fullname not in previousreference:
namepossibilities.append([fullname, 10])
splitname = str.split(fullname)
firstname = None
lastname = None
if len(splitname) > 1:
firstname = splitname[0]
lastname = ' '.join(splitname[1:])
if lastname[0].islower():
lastname = lastname[0].upper() + lastname[1:]
if lastname not in previousreference:
namepossibilities.append([lastname, 10])
if manager == 'n':
position = name.find('wikiposition').text
if re.search(r'\bgoal', position, re.I):
if lastname != None:
if ('doelman ' + lastname) not in previousreference:
namepossibilities.append(['doelman ' + lastname, 5])
if ('doelman ' + fullname) not in previousreference:
namepossibilities.append(['doelman ' + fullname, 5])
elif (re.search(r'back', position, re.I)) or (re.search(r'defender', position, re.I)) or (re.search(r'sweeper', position, re.I)):
if lastname != None:
if ('verdediger ' + lastname) not in previousreference:
namepossibilities.append(['verdediger ' + lastname, 5])
if ('verdediger ' + fullname) not in previousreference:
namepossibilities.append(['verdediger ' + fullname, 5])
elif (re.search(r'midfielder', position, re.I)) or (re.search(r'winger', position, re.I)):
if lastname != None:
if ('middenvelder ' + lastname) not in previousreference:
namepossibilities.append(['middenvelder ' + lastname, 5])
if ('doelman ' + fullname) not in previousreference:
namepossibilities.append(['middenvelder ' + fullname, 5])
elif (re.search(r'forward', position, re.I)) or (re.search(r'striker', position, re.I)) or (re.search(r'attacker', position, re.I)):
if lastname != None:
if ('aanvaller ' + lastname) not in previousreference:
namepossibilities.append(['aanvaller ' + lastname, 5])
if ('aanvaller ' + fullname) not in previousreference:
namepossibilities.append(['aanvaller ' + fullname, 5])
else:
if lastname != None:
if ('manager ' + lastname) not in previousreference:
namepossibilities.append(['manager ' + lastname, 5])
if ('manager ' + fullname) not in previousreference:
namepossibilities.append(['manager ' + fullname, 5])
elems = [i[0] for i in namepossibilities]
probs = [i[1] for i in namepossibilities]
norm = [float(i) / sum(probs) for i in probs]
namechoice = numpy.random.choice(elems, p=norm)
nametuple = (player, namechoice)
return nametuple
def ClubReferenceModel(club, soup, homeaway, gap, **kwargs):
previousgaps = kwargs['previousgaplist']
previousreference = []
# Get all the tuples, since these are the previous processed named entities
for idx, previousgap in enumerate(previousgaps):
if isinstance(previousgap, tuple):
# If the named entity is the same as the club of the current event, save the named entity and how far back the reference was
if previousgap[0] == club:
previousreference.append(previousgap[1])
# If there is no mention of the club in the last sentence, just use the name of the club
namepossibilities = []
if (len(previousreference) == 0):
namepossibilities.append([club, 10])
else:
if club not in previousreference:
namepossibilities.append([club, 50])
if soup.find('highlights').find('home').find(text=club):
if 'de thuisploeg' not in previousreference:
namepossibilities.append(['de thuisploeg', 10])
try:
manager = soup.find('managers').find('home').find('name').text
if ('de ploeg van manager ' + manager) not in previousreference:
namepossibilities.append(['de ploeg van manager ' + manager, 10])
except AttributeError:
try:
manager = soup.find('managers').find('home').find('fullname').text
if ('de ploeg van manager ' + manager) not in previousreference:
namepossibilities.append(['de ploeg van manager ' + manager, 10])
except AttributeError:
''
else:
if ('de uitploeg') not in previousreference:
namepossibilities.append(['de uitploeg', 10])
try:
manager = soup.find('managers').find('away').find('name').text
if ('de ploeg van manager ' + manager) not in previousreference:
namepossibilities.append(['de ploeg van manager ' + manager, 10])
except AttributeError:
try:
manager = soup.find('managers').find('home').find('fullname').text
if ('de ploeg van manager ' + manager) not in previousreference:
namepossibilities.append(['de ploeg van manager ' + manager, 10])
except AttributeError:
''
citydict = {}
workbook = xlrd.open_workbook(r'Clubs and Nicknames.xlsx')
worksheets = workbook.sheet_names()[0]
worksheet = workbook.sheet_by_name(worksheets)
for curr_row in range(worksheet.nrows):
curr_cell = 1
excelclub = worksheet.cell_value(curr_row, curr_cell)
curr_cell = 3
excelcity = worksheet.cell_value(curr_row, curr_cell)
citydict.update({excelclub: excelcity})
if club in citydict:
if ('de club uit ' + citydict[club]) not in previousreference:
namepossibilities.append(['de club uit ' + citydict[club], 10])
elems = [i[0] for i in namepossibilities]
probs = [i[1] for i in namepossibilities]
norm = [float(i) / sum(probs) for i in probs]
namechoice = numpy.random.choice(elems, p=norm)
nametuple = (club, namechoice)
return nametuple
#print ClubReferenceModel('Ajax', soup, homeaway, gap, event=event, previousgaplist=previousgaplist)