-
Notifications
You must be signed in to change notification settings - Fork 83
Expand file tree
/
Copy pathcharacterDefinitions.py
More file actions
42 lines (32 loc) · 2.26 KB
/
characterDefinitions.py
File metadata and controls
42 lines (32 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#--Can add new definitions here to apply this code to other problems or datasets.--
import numpy as np
def getHandwritingCharacterDefinitions():
"""
Returns a dictionary with entries that define the names of each character, its length, and where the pen tip begins.
Returns:
charDef (dict)
"""
charDef = {}
#Define the list of all 31 characters and their names.
charDef['charList'] = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',
'greaterThan','comma','apostrophe','tilde','questionMark']
charDef['charListAbbr'] = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',
'>',',',"'",'~','?']
#Define the length of each character (in # of 10 ms bins) to use for each template.
#These were hand-defined based on visual inspection of the reconstructed pen trajectories.
charDef['charLen'] = np.array([99, 91, 70, 104, 98, 125, 110, 104, 79, 92, 127, 68, 132, 90,
84, 113, 104, 74, 86, 110, 86, 83, 110, 103, 115, 100, 82, 77, 116, 71, 110]).astype(np.int32)
#For each character, this defines the starting location of the pen tip (0 = bottom of the line, 1 = top)
charDef['penStart'] = [0.25, 1, 0.5, 0.5, 0.25, 1.0, 0.25, 1.0, 0.5, 0.5, 1, 1, 0.5, 0.5, 0.25, 0.5, 0.25, 0.5, 0.5, 1,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 1, 0.5, 1]
#dictionary to convert string representation to character index
charDef['strToCharIdx'] = {}
for x in range(len(charDef['charListAbbr'])):
charDef['strToCharIdx'][charDef['charListAbbr'][x]] = x
#ordering of characters that kaldi (i.e., the language model) expects
charDef['kaldiOrder'] = ['<ctc>','>',"'",',','.','?','a','b','c','d','e','f','g','h','i','j',
'k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
#re-indexing to match kaldi order (e.g., outputs[:,:,charDef['idxToKaldi']] places the output in kald-order)
charDef['idxToKaldi'] = np.array([31,26,28,27,29,30,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,
21,22,23,24,25]).astype(np.int32)
return charDef