forked from Picovoice/speech-to-text-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbenchmark.py
More file actions
74 lines (56 loc) · 2.75 KB
/
benchmark.py
File metadata and controls
74 lines (56 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import argparse
import editdistance
from num2words import num2words
from dataset import *
from engine import *
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--engine_type', type=str, required=True)
args = parser.parse_args()
print('Engine type is %s' % str(args.engine_type))
dataset = Dataset.create('tudade')
print('loaded %s with %.2f hours of data' % (str(dataset), dataset.size_hours()))
engine = ASREngine.create(ASREngines[args.engine_type])
print('created %s engine' % str(engine))
word_error_count = 0
word_count = 0
composite_errors = list()
composite_error_count = 0
millis = int(round(time.time() * 1000))
for i in range(dataset.size()):
print("sample %s of %s" % (str(i + 1), str(dataset.size())))
path, ref_transcript = dataset.get(i)
transcript = engine.transcribe(path)
ref_words = ref_transcript.strip('\n ').lower().split()
words = transcript.strip('\n ').lower().split()
if isinstance(dataset, TudaDeDataset) and dataset.cleaned:
for word in range(len(words)):
if words[word].isnumeric(): words[word] = num2words(words[word], lang='de')
for ref_word in range(len(ref_words)):
for word in range(len(words)):
if (ref_words[ref_word].startswith(words[word])
and ref_words[ref_word] != words[word] and word + 1 < len(words)):
comp = list()
comp.append(words[word])
for following in range(word + 1, len(words)):
if not ref_words[ref_word].startswith(''.join(comp) + words[following]): break
if ''.join(comp) is ref_words[ref_word]: break
comp.append(words[following])
if ''.join(comp) == ref_words[ref_word]:
composite_error_count += len(comp)
composite_errors.append((ref_words[ref_word], comp))
distance = editdistance.eval(ref_words, words)
print("Ref: %s" % ref_words)
print("Got: %s" % words)
print("Distance: %s" % str(distance))
word_error_count += distance
word_count += len(ref_words)
print('word count: %d' % word_count)
print('word error count : %d' % word_error_count)
print('Composite error count : %d' % composite_error_count)
print('word error rate without composite errors : %.2f' % (
100 * float(word_error_count - len(composite_errors)) / word_count))
print('word error rate : %.2f' % (100 * float(word_error_count) / word_count))
end_millis = int(round(time.time() * 1000))
print("Start: %s", str(millis))
print("End: %s", str(end_millis))