-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdata_augmentation.py
More file actions
59 lines (49 loc) · 1.9 KB
/
data_augmentation.py
File metadata and controls
59 lines (49 loc) · 1.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import librosa
import numpy as np
import random
import librosa
import random
def add_gaussian_noise(audio_data,sample_rate= 16000, noise_level=0.002):
noise = np.random.normal(0.00001, noise_level, audio_data.shape)
return audio_data + noise
def time_stretch(audio_data, sample_rate):
"""
Stretch or compress the time of the audio without changing the pitch.
"""
if audio_data.size >= 2048:
stretch_factor = random.uniform(0.8, 1.2)
return librosa.effects.time_stretch(audio_data, rate= stretch_factor)
return add_gaussian_noise(audio_data,sample_rate)
def pitch_shift(audio_data, sample_rate):
"""
Shift the pitch of the audio up or down.
"""
if audio_data.size >= 2048:
n_steps = random.uniform(-3, 2) # Shift pitch by up to -3 to 2 semitones
return librosa.effects.pitch_shift(audio_data, sr=sample_rate, n_steps=n_steps)
return add_gaussian_noise(audio_data,sample_rate)
def random_crop_pad(audio_data, sample_rate):
"""
Randomly crop or pad the audio signal.
"""
crop_start = random.randint(0, len(audio_data) // 10) # Randomly crop up to 10% from the start
crop_end = random.randint(len(audio_data) - len(audio_data) // 10, len(audio_data)) # Random crop at the end
cropped_audio = audio_data[crop_start:crop_end]
return cropped_audio
def apply_augmentation(audio_data, sample_rate):
"""
Apply data augmentation to the audio signal.
Parameters:
- audio_data: Numpy array of the audio signal.
- sample_rate: Sample rate of the audio signal.
Returns:
- Augmented audio data.
"""
augmentations = [
add_gaussian_noise,
time_stretch,
pitch_shift,
random_crop_pad
]
augmentation = random.choice(augmentations)
return augmentation(audio_data, sample_rate)