-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload_data.py
More file actions
100 lines (75 loc) · 3.49 KB
/
load_data.py
File metadata and controls
100 lines (75 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import numpy
import cPickle
# This should point to the contents of the directory /data/lisa/data/timit/
# from the lab network.
timit_readable_location = "../timit/readable"
phonemes = numpy.array(cPickle.load(open(timit_readable_location + '/phonemes.pkl')))
words = numpy.array(cPickle.load(open(timit_readable_location + '/words.pkl')))
spkrinfo = numpy.load(timit_readable_location + '/spkrinfo.npy').tolist().toarray()
phone_map = cPickle.load(open(timit_readable_location + '/phone_map.pkl'))
def word_num_to_word_str(idx):
return words[idx]
class TimitDataSet:
def __init__(self,path_prefix):
# Sound data
self.x_raw = numpy.load(path_prefix + '_x_raw.npy')
# list of every utterance of phoneme
self.phn = numpy.load(path_prefix + '_phn.npy')
# Which phonemes appear in each recorded sentence
self.seq_to_phn = numpy.load(path_prefix+ '_seq_to_phn.npy')
# Which words appear in each recorded sentence
self.seq_to_wrd = numpy.load(path_prefix + '_seq_to_wrd.npy')
# Information on each word that appears
self.wrd = numpy.load(path_prefix + '_wrd.npy')
# Which speaker recorded each sentence
self.spkr = numpy.load(path_prefix + '_spkr.npy')
def number_of_recorded_sentences(self):
return len(self.x_raw)
def number_of_recorded_phonemes(self):
return len(self.phn)
def number_of_recorded_words(self):
return len(self.wrd)
def number_of_distinct_speakers(self):
return len(unique(self.spkr))
def sentence_idx_to_wave(self,idx):
return self.x_raw[idx]
def sentence_idx_to_word_idcs(self,idx):
first_word_idx, last_word_idx = self.seq_to_wrd[idx]
return range(first_word_idx,last_word_idx)
def sentence_idx_to_word_nums(self,idx):
return self.wrd[self.sentence_idx_to_word_idcs(idx)][:,2]
def sentence_idx_to_words(self,idx):
return word_num_to_word_str( self.sentence_idx_to_word_nums(idx) )
# For lack of a better terminology I call a "phoneme index"
# the index of the a recoding of an utterance of a phoneme
# in the train_phn array. I call the number that identifies
# a specific phoneme (e.g. 'h#' or 'eng') the "phoneme number".
def sentence_idx_to_phoneme_idcs(self,idx):
first_phoneme_idx, last_phoneme_idx = self.seq_to_phn[idx]
return range( first_phoneme_idx, last_phoneme_idx)
def sentence_idx_to_phoneme_nums(self,idx):
return self.phn[self.sentence_idx_to_phoneme_idcs(idx)][:,2]
def sentence_idx_to_phoneme_strs(self,idx):
return phonemes[ self.sentence_idx_to_phoneme_nums(idx) ]
def phoneme_idx_to_phoneme_num(self,idx):
return self.phn[idx][2]
def phoneme_idx_to_phoneme_str(self,idx):
return phonemes[ self.phoneme_idx_to_phoneme_num(idx) ]
def phoneme_idx_to_sentence_idx(self,idx): # In which setence does this recording of a phoneme occur
return find( map( lambda x: x[0]<=idx<x[1], self.seq_to_phn ) )[0]
def phoneme_idx_to_offsets(self,idx): # Start and end in sentence
return self.phn[idx][0:2]
def phoneme_idx_to_wave(self,idx):
sent_wave = self.sentence_idx_to_wave( self.phoneme_idx_to_sentence_idx(idx) )
start, end = self.phoneme_idx_to_offsets(idx)
return sent_wave[start:end]
def word_idx_to_offsets(self,idx):
return self.wrd[idx][0:2]
def word_idx_to_word_num(self,idx):
return self.wrd[idx][2]
def sentence_idx_to_word_idcs(self,idx):
first_word_idx, last_word_idx = self.seq_to_wrd[idx]
return range( first_word_idx, last_word_idx)
train = TimitDataSet(timit_readable_location + '/train')
valid = TimitDataSet(timit_readable_location + '/valid')
test = TimitDataSet(timit_readable_location + '/test')