Masterthesis/data_augmentation.py at main · Steckdose007/Masterthesis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import librosa
import numpy as np
import random
import librosa
import random


def add_gaussian_noise(audio_data,sample_rate= 16000, noise_level=0.002):
    noise = np.random.normal(0.00001, noise_level, audio_data.shape)
    return audio_data + noise

def time_stretch(audio_data, sample_rate):
    """
    Stretch or compress the time of the audio without changing the pitch.
    """
    if audio_data.size >= 2048:
        stretch_factor = random.uniform(0.8, 1.2)
        return librosa.effects.time_stretch(audio_data, rate= stretch_factor)
    return add_gaussian_noise(audio_data,sample_rate)

def pitch_shift(audio_data, sample_rate):
    """
    Shift the pitch of the audio up or down.
    """
    if audio_data.size >= 2048:
        n_steps = random.uniform(-3, 2)  # Shift pitch by up to -3 to 2 semitones
        return librosa.effects.pitch_shift(audio_data, sr=sample_rate, n_steps=n_steps)
    return add_gaussian_noise(audio_data,sample_rate)

def random_crop_pad(audio_data, sample_rate):
    """
    Randomly crop or pad the audio signal.
    """
    crop_start = random.randint(0, len(audio_data) // 10)  # Randomly crop up to 10% from the start
    crop_end = random.randint(len(audio_data) - len(audio_data) // 10, len(audio_data))  # Random crop at the end
    cropped_audio = audio_data[crop_start:crop_end]
    return cropped_audio


def apply_augmentation(audio_data, sample_rate):
        """
        Apply data augmentation to the audio signal.

        Parameters:
        - audio_data: Numpy array of the audio signal.
        - sample_rate: Sample rate of the audio signal.

        Returns:
        - Augmented audio data.
        """
        augmentations = [
            add_gaussian_noise,
            time_stretch,
            pitch_shift,
            random_crop_pad
        ]
        augmentation = random.choice(augmentations)
        return augmentation(audio_data, sample_rate)