From e76132320e5ccff14a60d4da415dad8ebc3f0aa4 Mon Sep 17 00:00:00 2001 From: aryanKaga <162778626+aryanKaga@users.noreply.github.com> Date: Wed, 23 Jul 2025 12:49:48 +0530 Subject: [PATCH] Add files via upload --- create_dataset.py | 80 +++++++++++++++++++++++++++++++++++++++++++++++ stft.py | 34 ++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 create_dataset.py create mode 100644 stft.py diff --git a/create_dataset.py b/create_dataset.py new file mode 100644 index 00000000..b62a465a --- /dev/null +++ b/create_dataset.py @@ -0,0 +1,80 @@ +import os +import numpy as np +import threading +import time +import psutil +from stft import convert_to_magnitude,get_duration_difference_ms +import re + + +dataset={"input":[],"output":[]} + + + + +terminal_direc=os.getcwd() + +clean_speech_dir=os.path.join(terminal_direc,'CleanSpeech_training') # directory for clean files +noisy_speech_dir=os.path.join(terminal_direc,'data10','NoisySpeech_training') #directory for noisy files + +clean_speech=os.listdir(clean_speech_dir) +noisy_speech=os.listdir(noisy_speech_dir) + +total_length_list=len(os.listdir(clean_speech_dir)) # the total length depends on unique clean speech available as data + +dataset_list = [{"input": [], "output": []} for _ in range(total_length_list+1)] + +for filename in clean_speech: + file_digit = int(re.search(r'clnsp(\d+)\.wav',filename).group(1)) + + dataset_list[file_digit]['output'].append(filename) + + +maxindex=0 +for filename in noisy_speech: + file_digit = int(re.search(r'clnsp(\d+)\.wav',filename).group(1)) + + dataset_list[file_digit]['input'].append(filename) + + + +def create_mag_spectra_dataset(): + + mag_data={"input":[],"output":[]} + + + for data in range(len(dataset_list)): + + input_file=data['input'] #its the clean file to be extracted + + input_mag=convert_to_magnitude(os.path.join(clean_speech_dir,input_file)) + + output_file=data['output'] # its list of noisy file corresponding to the above clean file + + + for filename in output_file: + + output_mag=convert_to_magnitude(os.path.join(noisy_speech_dir,filename)) + + min_width=min(input_mag.shape[1],output_mag.shape[1]) + + input_mag=input_mag[:,:min_width] + + output_mag=output_mag[:,:min_width] + + mag_data['input'].append(input_mag) + mag_data['output'].append(output_mag) + + + return mag_data + + + + + + + + + + + diff --git a/stft.py b/stft.py new file mode 100644 index 00000000..a8b6802c --- /dev/null +++ b/stft.py @@ -0,0 +1,34 @@ +import librosa +import soundfile +import os +import numpy as np +#convert a soundfile to stft and return + + + +working_direc=os.getcwd() +def convert_to_stft(path): + amp,sr=librosa.load(path,sr=16000) + return librosa.stft(amp) + + +def convert_to_magnitude(path): + amp,sr=librosa.load(path,sr=16000) + return np.abs(librosa.stft(amp)) + + +def get_duration_difference_ms(file1, file2, sr=16000): + y1, _ = librosa.load(file1, sr=sr) + y2, _ = librosa.load(file2, sr=sr) + + len1 = len(y1) + len2 = len(y2) + + diff_samples = abs(len1 - len2) + diff_ms = (diff_samples / sr) * 1000 + + print(f"Length 1: {len1} samples ({len1/sr:.3f} s)") + print(f"Length 2: {len2} samples ({len2/sr:.3f} s)") + print(f"Difference: {diff_samples} samples = {diff_ms:.2f} ms") + + return diff_ms \ No newline at end of file