-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathpreprocessing.py
More file actions
executable file
·119 lines (90 loc) · 4 KB
/
preprocessing.py
File metadata and controls
executable file
·119 lines (90 loc) · 4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import librosa
import random
import torch
import numpy as np
from argparse import ArgumentParser
# Preprocessing logics of Honk
# noise are off shifting is no longer random
# usage :
# python ./preprocessing.py --file "data/test.wav"
sample_rate = 16000
def print_data(name, data) :
print(name, '\t', data.dtype, '\t', data.shape, '\n', data)
if (np.iscomplex(np.min(data))) :
print('\trange : ( ', np.min(data), ' ~ ', np.max(data), ' )')
print('\tmean : ', np.mean(data))
print('\tmdedian : ', np.median(data) , '\n')
else :
print('\trange : ( ', round(np.min(data), 10), ' ~ ', round(np.max(data), 10), ' )')
print('\tmean : ', round(np.mean(data), 10))
print('\tmdedian : ', round(np.median(data), 10) , '\n')
def timeshift_audio(config, data):
shift = (sample_rate * config["timeshift_ms"]) // 1000
shift = random.randint(-shift, shift)
print('shift = ', shift, '\n')
a = -min(0, shift)
b = max(0, shift)
data = np.pad(data, (a, b), "constant")
return data[:len(data) - a] if a else data[b:]
def preprocess_audio(data, config):
amp_spectrum = librosa.core.stft(data, n_fft=config["n_fft"], hop_length=config["hop_length"], pad_mode='constant')
print_data('amp_spectrum data', amp_spectrum)
# np.abs(D[f, t]) is the magnitude of frequency bin f at frame t
power_spectrum = np.abs(amp_spectrum)**2
print_data('power spectrogram data', power_spectrum)
# corresponding librosa operations
# in order to use pad mode = 'constant' for stft, melspectrogram must be computed manually as in this preprocessing script
# default pad_mode for stft is reflection padding
# S, _ = librosa.spectrum._spectrogram(y=data, n_fft=config["n_fft"], hop_length=config["hop_length"],
# power=2)
# print_data('power spectrogram generated through _spectrogram', S)
mel_basis = librosa.filters.mel(sample_rate, n_fft=config["n_fft"], n_mels=config["n_mels"], fmin=config["fmin"], fmax=config["fmax"])
print_data('mel_basis', mel_basis)
data = np.dot(mel_basis, power_spectrum)
print_data('melspectrogram data', data)
# corresponding librosa operations
# data = librosa.feature.melspectrogram(data, sample_rate, n_mels=config["n_mels"], hop_length=config["hop_length"], n_fft=config["n_fft"], fmin=config["fmin"], fmax=config["fmax"])
# print_data('melspectrogram data', data)
data[data > 0] = np.log(data[data > 0])
print_data('logged melspectrogram data', data)
data = [np.matmul(config["dct_filters"], x) for x in np.split(data, data.shape[1], axis=1)]
data = np.array(data, order="F").squeeze(2).astype(np.float32)
print_data('dct_filted data', data)
return data
def preprocess(config, example, timeshift=True, silence=False):
if silence:
example = "__silence__"
in_len = config["input_length"]
if silence:
data = np.zeros(in_len, dtype=np.float32)
else:
data = librosa.core.load(example, sample_rate)[0]
print_data('loaded data', data)
data = np.pad(data, (0, max(0, in_len - len(data))), "constant")
print_data('padded data', data)
if timeshift:
data = timeshift_audio(config, data)
print_data('shifted data', data)
data = preprocess_audio(data, config)
print_data('preprocessed data', data)
# data = torch.from_numpy(data);
return data
def main():
parser = ArgumentParser()
parser.add_argument("-f", "--file", dest="filename", default="data/test.wav")
args = parser.parse_args()
config = {
"n_dct_filters" : 40,
"n_mels" : 40,
"n_fft" : 512, # window size (limited by browser)
"hop_length" : 160,
"input_length" : sample_rate,
"timeshift_ms" : 0,
"fmin" : 20,
"fmax" : 4000,
};
config["dct_filters"] = librosa.filters.dct(config["n_dct_filters"], config["n_mels"])
print_data('dct_filter', config["dct_filters"])
data = preprocess(config, args.filename)
if __name__ == "__main__":
main()