From 0a1364d46fa4d0b385882d3042f638bca171ec2f Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Fri, 6 Jun 2025 14:57:34 -0700 Subject: [PATCH 01/13] add script to create 'loud' segments needs some work to go through numerous sound files and give better resulting filenames. but this script takes a sound file and then calculates the rms across a certain number of frames you tell it the size and hop for, giving you an array of rms values. It then calculates the average rms, and multiplies it by 1.5 and creates 3 second segments of audio centered around the frame that exceeded the rms threshold. it will not create overlapping segments. there's definitely some potential issues with this but for now seems to be able to create segments based on the relative loudness of the whole sample. for example it hasn't had to handle creating a segment where the values that exceed the threshold equate for more than the specified 3s for clip creation. --- tools/extract_noise.py | 88 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 tools/extract_noise.py diff --git a/tools/extract_noise.py b/tools/extract_noise.py new file mode 100644 index 0000000..0d632cf --- /dev/null +++ b/tools/extract_noise.py @@ -0,0 +1,88 @@ +import librosa +import librosa.display +import matplotlib.pyplot as plt +import numpy as np +import soundfile as sf + +FILENAME = '/mnt/projects/PandaBear/acoustics/zoo_recordings/2025-05-23/SMM1/GPBZ_SMM01_20250523_103000.wav' # change to path of your sound file +FRAME_LENGTH = 4096 +HOP_LENGTH = 2048 +NUM_SECONDS_OF_SLICE = 3 + +sound, sr = librosa.load(FILENAME, sr=None) +print(f"sample rate: {sr}") + +clip_rms = librosa.feature.rms(y=sound, + frame_length=FRAME_LENGTH, + hop_length=HOP_LENGTH) + +clip_rms = clip_rms.squeeze() +print(f"clip RMS: {clip_rms}, length of clip rms: {clip_rms.size}") +peak_rms_index = clip_rms.argmax() +print(f"Peak RMS index: {peak_rms_index}, value: {clip_rms[peak_rms_index]}") +average_rms = np.mean(clip_rms) * (3/2) + +above_avg_rms = clip_rms +for index, value in enumerate(clip_rms): + if average_rms > clip_rms[index]: + above_avg_rms[index] = 0 + else: + above_avg_rms[index] = 1 + +sum = np.sum(above_avg_rms) +print(f"num frames with above the 1.5x average rms value: {sum}") + +yes_counter = 0 +start_index = None +last_right_index = 0 + +for index, value in enumerate(above_avg_rms): + print(f"current index in above avg rms = {index}") + if value == 1: + print(f"value is 1!") + if yes_counter == 0: + start_index = index + print(f"newest start_index: {start_index}") + yes_counter +=1 + print(f"yes counter : {yes_counter}") + else: + if yes_counter > 0: + print(f"yes counter reached a 0 at index : {index}") + mid_index = int((index - start_index) / 2) + mid_index = mid_index + start_index + real_index = mid_index * HOP_LENGTH + int(FRAME_LENGTH/2) + half_slice_width = int(NUM_SECONDS_OF_SLICE * sr / 2) + left_index = max(0, real_index - half_slice_width) + print(f"left index to start clip: {left_index}") + if left_index > last_right_index: + right_index = real_index + half_slice_width +# current left index needs to be greater than the last right index to prevent overlap + last_right_index = right_index + + print(f"right index to start clip: {right_index}") + sound_slice = sound[left_index:right_index] + + sf.write(f"/home/katiegarwood/test_panda/clip{index}.wav", sound_slice, sr) + yes_counter = 0 + print("created clip, setting yes_counter back to 0") + else: + print("skipping this clip because it would overlap with the last one") + +if yes_counter > 0: + stop_index = index + mid_index = int((stop_index - start_index) / 2) + real_index = mid_index * HOP_LENGTH + int(FRAME_LENGTH/2) + half_slice_width = int(NUM_SECONDS_OF_SLICE * sr / 2) + left_index = max(0, real_index - half_slice_width) + if left_index > last_right_index: + right_index = real_index + half_slice_width + + print(f"right index to start clip: {right_index}") + sound_slice = sound[left_index:right_index] + + sf.write(f"/home/katiegarwood/test_panda/clip{index}.wav", sound_slice, sr) + print("created clip, setting yes_counter back to 0") + sf.write("/home/katiegarwood/test_panda/clip.wav", sound_slice, sr) + else: + print("skipping this clip because it qould overlap with the last one") + From 2f3bb93551e7dd1a80d44eb400d2bbb7a46d0c15 Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Fri, 6 Jun 2025 15:31:34 -0700 Subject: [PATCH 02/13] display rms and mel spectrogram together displays 2 charts with the same timestep, the top one is the rms for each of your specified frame lengths and the bottom is a mel spectrogram. you can see the correlation between the two and it can be helpful --- tools/display_rms_and_mel.py | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tools/display_rms_and_mel.py diff --git a/tools/display_rms_and_mel.py b/tools/display_rms_and_mel.py new file mode 100644 index 0000000..a825aee --- /dev/null +++ b/tools/display_rms_and_mel.py @@ -0,0 +1,37 @@ +import librosa +import librosa.display +import matplotlib.pyplot as plt +import numpy as np +import soundfile as sf + +FILENAME = '/mnt/projects/PandaBear/acoustics/zoo_recordings/2025-05-23/SMM1/GPBZ_SMM01_20250523_103000.wav' # change to path of your sound file +FRAME_LENGTH = 2048 +HOP_LENGTH = 512 +NUM_SECONDS_OF_SLICE = 3 + +sound, sr = librosa.load(FILENAME, sr=None) + +clip_rms = librosa.feature.rms(y=sound, + frame_length=FRAME_LENGTH, + hop_length=HOP_LENGTH) + +clip_rms = clip_rms.squeeze() +peak_rms_index = clip_rms.argmax() +print(f"Peak RMS index: {peak_rms_index}") +peak_index = peak_rms_index * HOP_LENGTH + int(FRAME_LENGTH/2) +print(f"Peak index: {peak_index}") + +S, phase = librosa.magphase(librosa.stft(sound)) +rms = librosa.feature.rms(S=S) +fig, ax = plt.subplots(nrows=2, sharex=True) +times = librosa.times_like(rms) +ax[0].semilogy(times, rms[0], label='RMS Energy') +ax[0].set(xticks=[]) +ax[0].legend() +ax[0].label_outer() +librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max), + y_axis='log', x_axis='time', ax=ax[1]) +ax[1].set(title='log Power spectrogram') + + +plt.show() From b1d43abc98b35a7f842cf4346b1edf8b112fc245 Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Wed, 20 Aug 2025 16:42:16 -0700 Subject: [PATCH 03/13] add tools to run multiple files beginnings of way to run multiple files through this process. needs to create different filenames to differentiate the original wavs still but then it should be gravy --- tools/extract_noise.py | 137 +++++++++++++++++++------------------ tools/extract_noise.yaml | 3 + tools/run_extract_clips.py | 22 ++++++ 3 files changed, 94 insertions(+), 68 deletions(-) create mode 100644 tools/extract_noise.yaml create mode 100644 tools/run_extract_clips.py diff --git a/tools/extract_noise.py b/tools/extract_noise.py index 0d632cf..c0b51e5 100644 --- a/tools/extract_noise.py +++ b/tools/extract_noise.py @@ -4,85 +4,86 @@ import numpy as np import soundfile as sf -FILENAME = '/mnt/projects/PandaBear/acoustics/zoo_recordings/2025-05-23/SMM1/GPBZ_SMM01_20250523_103000.wav' # change to path of your sound file -FRAME_LENGTH = 4096 -HOP_LENGTH = 2048 -NUM_SECONDS_OF_SLICE = 3 +def main(file): + FILENAME = file # change to path of your sound file + FRAME_LENGTH = 4096 + HOP_LENGTH = 2048 + NUM_SECONDS_OF_SLICE = 3 -sound, sr = librosa.load(FILENAME, sr=None) -print(f"sample rate: {sr}") + sound, sr = librosa.load(FILENAME, sr=None) + print(f"sample rate: {sr}") -clip_rms = librosa.feature.rms(y=sound, - frame_length=FRAME_LENGTH, - hop_length=HOP_LENGTH) + clip_rms = librosa.feature.rms(y=sound, + frame_length=FRAME_LENGTH, + hop_length=HOP_LENGTH) -clip_rms = clip_rms.squeeze() -print(f"clip RMS: {clip_rms}, length of clip rms: {clip_rms.size}") -peak_rms_index = clip_rms.argmax() -print(f"Peak RMS index: {peak_rms_index}, value: {clip_rms[peak_rms_index]}") -average_rms = np.mean(clip_rms) * (3/2) + clip_rms = clip_rms.squeeze() + print(f"clip RMS: {clip_rms}, length of clip rms: {clip_rms.size}") + peak_rms_index = clip_rms.argmax() + print(f"Peak RMS index: {peak_rms_index}, value: {clip_rms[peak_rms_index]}") + average_rms = np.mean(clip_rms) * (3/2) -above_avg_rms = clip_rms -for index, value in enumerate(clip_rms): - if average_rms > clip_rms[index]: - above_avg_rms[index] = 0 - else: - above_avg_rms[index] = 1 + above_avg_rms = clip_rms + for index, value in enumerate(clip_rms): + if average_rms > clip_rms[index]: + above_avg_rms[index] = 0 + else: + above_avg_rms[index] = 1 -sum = np.sum(above_avg_rms) -print(f"num frames with above the 1.5x average rms value: {sum}") + sum = np.sum(above_avg_rms) + print(f"num frames with above the 1.5x average rms value: {sum}") -yes_counter = 0 -start_index = None -last_right_index = 0 + yes_counter = 0 + start_index = None + last_right_index = 0 -for index, value in enumerate(above_avg_rms): - print(f"current index in above avg rms = {index}") - if value == 1: - print(f"value is 1!") - if yes_counter == 0: - start_index = index - print(f"newest start_index: {start_index}") - yes_counter +=1 - print(f"yes counter : {yes_counter}") - else: - if yes_counter > 0: - print(f"yes counter reached a 0 at index : {index}") - mid_index = int((index - start_index) / 2) - mid_index = mid_index + start_index - real_index = mid_index * HOP_LENGTH + int(FRAME_LENGTH/2) - half_slice_width = int(NUM_SECONDS_OF_SLICE * sr / 2) - left_index = max(0, real_index - half_slice_width) - print(f"left index to start clip: {left_index}") - if left_index > last_right_index: - right_index = real_index + half_slice_width + for index, value in enumerate(above_avg_rms): + print(f"current index in above avg rms = {index}") + if value == 1: + print(f"value is 1!") + if yes_counter == 0: + start_index = index + print(f"newest start_index: {start_index}") + yes_counter +=1 + print(f"yes counter : {yes_counter}") + else: + if yes_counter > 0: + print(f"yes counter reached a 0 at index : {index}") + mid_index = int((index - start_index) / 2) + mid_index = mid_index + start_index + real_index = mid_index * HOP_LENGTH + int(FRAME_LENGTH/2) + half_slice_width = int(NUM_SECONDS_OF_SLICE * sr / 2) + left_index = max(0, real_index - half_slice_width) + print(f"left index to start clip: {left_index}") + if left_index > last_right_index: + right_index = real_index + half_slice_width # current left index needs to be greater than the last right index to prevent overlap - last_right_index = right_index + last_right_index = right_index - print(f"right index to start clip: {right_index}") - sound_slice = sound[left_index:right_index] + print(f"right index to start clip: {right_index}") + sound_slice = sound[left_index:right_index] - sf.write(f"/home/katiegarwood/test_panda/clip{index}.wav", sound_slice, sr) - yes_counter = 0 - print("created clip, setting yes_counter back to 0") - else: - print("skipping this clip because it would overlap with the last one") + sf.write(f"/home/katiegarwood/test_panda/clip{index}.wav", sound_slice, sr) + yes_counter = 0 + print("created clip, setting yes_counter back to 0") + else: + print("skipping this clip because it would overlap with the last one") -if yes_counter > 0: - stop_index = index - mid_index = int((stop_index - start_index) / 2) - real_index = mid_index * HOP_LENGTH + int(FRAME_LENGTH/2) - half_slice_width = int(NUM_SECONDS_OF_SLICE * sr / 2) - left_index = max(0, real_index - half_slice_width) - if left_index > last_right_index: - right_index = real_index + half_slice_width + if yes_counter > 0: + stop_index = index + mid_index = int((stop_index - start_index) / 2) + real_index = mid_index * HOP_LENGTH + int(FRAME_LENGTH/2) + half_slice_width = int(NUM_SECONDS_OF_SLICE * sr / 2) + left_index = max(0, real_index - half_slice_width) + if left_index > last_right_index: + right_index = real_index + half_slice_width - print(f"right index to start clip: {right_index}") - sound_slice = sound[left_index:right_index] + print(f"right index to start clip: {right_index}") + sound_slice = sound[left_index:right_index] - sf.write(f"/home/katiegarwood/test_panda/clip{index}.wav", sound_slice, sr) - print("created clip, setting yes_counter back to 0") - sf.write("/home/katiegarwood/test_panda/clip.wav", sound_slice, sr) - else: - print("skipping this clip because it qould overlap with the last one") + sf.write(f"/home/katiegarwood/test_panda/clip{index}.wav", sound_slice, sr) + print("created clip, setting yes_counter back to 0") + sf.write("/home/katiegarwood/test_panda/clip.wav", sound_slice, sr) + else: + print("skipping this clip because it qould overlap with the last one") diff --git a/tools/extract_noise.yaml b/tools/extract_noise.yaml new file mode 100644 index 0000000..c86afbb --- /dev/null +++ b/tools/extract_noise.yaml @@ -0,0 +1,3 @@ +audio: /mnt/projects/PandaBear/acoustics/zoo_recordings/2025-05-23/SMM1/ +out: /home/katiegarwood/2025-05-23_SMM1_rms_clips/ + diff --git a/tools/run_extract_clips.py b/tools/run_extract_clips.py new file mode 100644 index 0000000..6e3daaa --- /dev/null +++ b/tools/run_extract_clips.py @@ -0,0 +1,22 @@ +from extract_noise import main +import argparse +import os +import yaml + + +if __name__ == "__main__": + PARSER = argparse.ArgumentParser( + description='Path to config file.' + ) + PARSER.add_argument('-config', type=str, + help='Path to config.') + ARGS = PARSER.parse_args() + with open(ARGS.config, 'r', encoding='UTF-8') as f: + config = yaml.safe_load(f) + all_files = os.listdir(config['audio']) + for file in all_files: + try: + print(f"running {file}") + main(os.path.join(config['audio'], file)) + except: + print("couldnt load {file}") From 325201ff923772fac003c94f1cc4fcd7593d90a1 Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Mon, 25 Aug 2025 11:59:06 -0700 Subject: [PATCH 04/13] add outfile and fix filenames filenames were assuming 1 wav file before, now it writes the name of the clip with the original wav file in the filename so its clearer, it also successfully usilizes the proper specified outpath to save the files --- tools/extract_noise.py | 21 +++++++++++++-------- tools/run_extract_clips.py | 7 ++++--- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/tools/extract_noise.py b/tools/extract_noise.py index c0b51e5..75435fd 100644 --- a/tools/extract_noise.py +++ b/tools/extract_noise.py @@ -3,8 +3,10 @@ import matplotlib.pyplot as plt import numpy as np import soundfile as sf +import os -def main(file): + +def main(file, out): FILENAME = file # change to path of your sound file FRAME_LENGTH = 4096 HOP_LENGTH = 2048 @@ -59,13 +61,14 @@ def main(file): right_index = real_index + half_slice_width # current left index needs to be greater than the last right index to prevent overlap last_right_index = right_index - + filename = os.path.basename(file) + filename = filename.strip('.wav') print(f"right index to start clip: {right_index}") sound_slice = sound[left_index:right_index] - - sf.write(f"/home/katiegarwood/test_panda/clip{index}.wav", sound_slice, sr) + name = out + filename + "_" + str(index) + ".wav" + sf.write(name, sound_slice, sr) yes_counter = 0 - print("created clip, setting yes_counter back to 0") + print(f"created {name}, setting yes_counter back to 0") else: print("skipping this clip because it would overlap with the last one") @@ -80,10 +83,12 @@ def main(file): print(f"right index to start clip: {right_index}") sound_slice = sound[left_index:right_index] + filename = os.path.basename(file) + filename = filename.strip('.wav') + name = out + filename + "_" + str(index) + ".wav" - sf.write(f"/home/katiegarwood/test_panda/clip{index}.wav", sound_slice, sr) - print("created clip, setting yes_counter back to 0") - sf.write("/home/katiegarwood/test_panda/clip.wav", sound_slice, sr) + sf.write(NAME, sound_slice, sr) + print("end of clip") else: print("skipping this clip because it qould overlap with the last one") diff --git a/tools/run_extract_clips.py b/tools/run_extract_clips.py index 6e3daaa..4bb9589 100644 --- a/tools/run_extract_clips.py +++ b/tools/run_extract_clips.py @@ -14,9 +14,10 @@ with open(ARGS.config, 'r', encoding='UTF-8') as f: config = yaml.safe_load(f) all_files = os.listdir(config['audio']) + out = config['out'] for file in all_files: try: print(f"running {file}") - main(os.path.join(config['audio'], file)) - except: - print("couldnt load {file}") + main(os.path.join(config['audio'], file), out) + except Exception as e: + print(f"couldnt load {file} because {e}") From 7c765fd85a8d12ac2ebce088acf316f97f9c802a Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Mon, 25 Aug 2025 12:07:34 -0700 Subject: [PATCH 05/13] added docstring fix pylint there is one error of catching too broad an except but im going to ignore it for right now because it helps with debugging --- tools/run_extract_clips.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/run_extract_clips.py b/tools/run_extract_clips.py index 4bb9589..743145e 100644 --- a/tools/run_extract_clips.py +++ b/tools/run_extract_clips.py @@ -1,7 +1,24 @@ -from extract_noise import main +"""Create segments of noisy audio from wavs. + +This script uses the extract noise function to +calculate the average RMS of a given wav file, +and then creates 3 second segments where the +RMS peaked above the average. This main script +parses through a directory and sends each wav +file through the function. The extract_noise.yaml +is an example of the config file needed, copy +it and fill it out prior to running script. + +Usage: + + python3 run_extract_noise.py + -config /path/to/extract_noise_copy.yaml + +""" import argparse import os import yaml +from extract_noise import main if __name__ == "__main__": From e7469ffec0f7c8b16a8c57bac0ae7d57bcce069a Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Mon, 25 Aug 2025 13:38:06 -0700 Subject: [PATCH 06/13] add yaml file example --- tools/extract_noise.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/extract_noise.yaml b/tools/extract_noise.yaml index c86afbb..20fe13c 100644 --- a/tools/extract_noise.yaml +++ b/tools/extract_noise.yaml @@ -1,3 +1,3 @@ -audio: /mnt/projects/PandaBear/acoustics/zoo_recordings/2025-05-23/SMM1/ -out: /home/katiegarwood/2025-05-23_SMM1_rms_clips/ +audio: /path/to/audio/file/dir/ +out: /path/to/output/folder/ From 7130c22f5882ff12f72254d51165480b8d372cee Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Mon, 25 Aug 2025 13:39:43 -0700 Subject: [PATCH 07/13] fixed linting and a logic error the last catch for making the final segment in a wav file was creating a new right_index to stop the recording, when it should have just been using the stop_index so that it wouldn't go out of bounds. also made a sub function to make the rms array to clean up the code a bit --- tools/extract_noise.py | 110 +++++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 48 deletions(-) diff --git a/tools/extract_noise.py b/tools/extract_noise.py index 75435fd..e52f9c4 100644 --- a/tools/extract_noise.py +++ b/tools/extract_noise.py @@ -1,94 +1,108 @@ +"""Extract noisy segments from a wav file. + +Takes in a wav file and an outpath to store +the 3 second segments that contain an RMS value above +the average RMS for that wav file. +""" +import os import librosa import librosa.display -import matplotlib.pyplot as plt import numpy as np import soundfile as sf -import os def main(file, out): - FILENAME = file # change to path of your sound file - FRAME_LENGTH = 4096 - HOP_LENGTH = 2048 - NUM_SECONDS_OF_SLICE = 3 + """Extract loud segments from a wav file. - sound, sr = librosa.load(FILENAME, sr=None) + Args: + file (str): The path of the current wav file. + out (str): The path to the directory to store the + loud segments. + """ + index = None + filename = file + frame_length = 4096 + hop_length = 2048 + num_sec_slice = 3 + sound, sr = librosa.load(filename, sr=None) print(f"sample rate: {sr}") - clip_rms = librosa.feature.rms(y=sound, - frame_length=FRAME_LENGTH, - hop_length=HOP_LENGTH) - - clip_rms = clip_rms.squeeze() - print(f"clip RMS: {clip_rms}, length of clip rms: {clip_rms.size}") - peak_rms_index = clip_rms.argmax() - print(f"Peak RMS index: {peak_rms_index}, value: {clip_rms[peak_rms_index]}") - average_rms = np.mean(clip_rms) * (3/2) - - above_avg_rms = clip_rms - for index, value in enumerate(clip_rms): - if average_rms > clip_rms[index]: - above_avg_rms[index] = 0 - else: - above_avg_rms[index] = 1 - - sum = np.sum(above_avg_rms) - print(f"num frames with above the 1.5x average rms value: {sum}") + above_avg_rms = find_peaks(frame_length, hop_length, sound) yes_counter = 0 start_index = None last_right_index = 0 for index, value in enumerate(above_avg_rms): - print(f"current index in above avg rms = {index}") if value == 1: - print(f"value is 1!") if yes_counter == 0: start_index = index - print(f"newest start_index: {start_index}") - yes_counter +=1 - print(f"yes counter : {yes_counter}") + yes_counter += 1 else: if yes_counter > 0: - print(f"yes counter reached a 0 at index : {index}") mid_index = int((index - start_index) / 2) mid_index = mid_index + start_index - real_index = mid_index * HOP_LENGTH + int(FRAME_LENGTH/2) - half_slice_width = int(NUM_SECONDS_OF_SLICE * sr / 2) + real_index = mid_index * hop_length + int(frame_length/2) + half_slice_width = int(num_sec_slice * sr / 2) left_index = max(0, real_index - half_slice_width) - print(f"left index to start clip: {left_index}") if left_index > last_right_index: right_index = real_index + half_slice_width -# current left index needs to be greater than the last right index to prevent overlap - last_right_index = right_index + # left index needs to be greater than the last right + last_right_index = right_index + 1 filename = os.path.basename(file) filename = filename.strip('.wav') - print(f"right index to start clip: {right_index}") sound_slice = sound[left_index:right_index] name = out + filename + "_" + str(index) + ".wav" sf.write(name, sound_slice, sr) yes_counter = 0 print(f"created {name}, setting yes_counter back to 0") else: - print("skipping this clip because it would overlap with the last one") + print("skipping clip bc it would overlap with last clip") if yes_counter > 0: stop_index = index mid_index = int((stop_index - start_index) / 2) - real_index = mid_index * HOP_LENGTH + int(FRAME_LENGTH/2) - half_slice_width = int(NUM_SECONDS_OF_SLICE * sr / 2) + real_index = mid_index * hop_length + int(frame_length/2) + half_slice_width = int(num_sec_slice * sr / 2) left_index = max(0, real_index - half_slice_width) if left_index > last_right_index: - right_index = real_index + half_slice_width - - print(f"right index to start clip: {right_index}") - sound_slice = sound[left_index:right_index] + sound_slice = sound[left_index:stop_index] filename = os.path.basename(file) filename = filename.strip('.wav') name = out + filename + "_" + str(index) + ".wav" + sf.write(name, sound_slice, sr) + else: + print("skipping clip bc it would overlap with last clip") + + +def find_peaks(frame_length, hop_length, sound): + """Find peak RMS moments in a sound file. + + Args: + frame_length (int): Window size. + hop_length (int): Overlap between frames. + sound (numpy.ndarray): The audio as a time series array. + + Returns: + numpy.ndarray: The array containing each frame as an index + with values corresponding to whether that + frame exceeded the avg RMS or not. + """ + clip_rms = librosa.feature.rms(y=sound, + frame_length=frame_length, + hop_length=hop_length) + + clip_rms = clip_rms.squeeze() + average_rms = np.mean(clip_rms) * (3/2) + above_avg_rms = clip_rms - sf.write(NAME, sound_slice, sr) - print("end of clip") + for index, _ in enumerate(clip_rms): + if average_rms > clip_rms[index]: + above_avg_rms[index] = 0 else: - print("skipping this clip because it qould overlap with the last one") + above_avg_rms[index] = 1 + + num_frames = np.sum(above_avg_rms) + print(f"num frames with above the 1.5x average rms value: {num_frames}") + return above_avg_rms From c728674eef707e22285cbbd337625a3dac758a3d Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Mon, 25 Aug 2025 14:08:42 -0700 Subject: [PATCH 08/13] change the function name from main i want to ultimately put this in the whoot package so it shouldn't necessarily be called main --- tools/extract_noise.py | 2 +- tools/run_extract_clips.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/extract_noise.py b/tools/extract_noise.py index e52f9c4..0454eb0 100644 --- a/tools/extract_noise.py +++ b/tools/extract_noise.py @@ -11,7 +11,7 @@ import soundfile as sf -def main(file, out): +def clip_loud_segments(file, out): """Extract loud segments from a wav file. Args: diff --git a/tools/run_extract_clips.py b/tools/run_extract_clips.py index 743145e..af9e371 100644 --- a/tools/run_extract_clips.py +++ b/tools/run_extract_clips.py @@ -18,7 +18,7 @@ import argparse import os import yaml -from extract_noise import main +from extract_noise import clip_loud_segments if __name__ == "__main__": From b526ad561e5eb91320537c51ae4ad7da45a2581a Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Mon, 25 Aug 2025 14:09:36 -0700 Subject: [PATCH 09/13] fix function name --- tools/run_extract_clips.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/run_extract_clips.py b/tools/run_extract_clips.py index af9e371..fdad232 100644 --- a/tools/run_extract_clips.py +++ b/tools/run_extract_clips.py @@ -35,6 +35,6 @@ for file in all_files: try: print(f"running {file}") - main(os.path.join(config['audio'], file), out) + clip_loud_segments(os.path.join(config['audio'], file), out) except Exception as e: print(f"couldnt load {file} because {e}") From 5c836dbed090a2118f678af223b8ddb63c7de645 Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Mon, 25 Aug 2025 14:27:03 -0700 Subject: [PATCH 10/13] reorganize with updated dev to make it be in the whoot package --- .../extract_noise_example.yaml | 0 tools/README.md | 14 ++++++++++++++ .../{run_extract_clips.py => run_extract_noise.py} | 2 +- whoot/__init__.py | 3 +++ {tools => whoot}/extract_noise.py | 0 5 files changed, 18 insertions(+), 1 deletion(-) rename tools/extract_noise.yaml => cfgs/extract_noise_example.yaml (100%) create mode 100644 tools/README.md rename tools/{run_extract_clips.py => run_extract_noise.py} (95%) rename {tools => whoot}/extract_noise.py (100%) diff --git a/tools/extract_noise.yaml b/cfgs/extract_noise_example.yaml similarity index 100% rename from tools/extract_noise.yaml rename to cfgs/extract_noise_example.yaml diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..b34c6af --- /dev/null +++ b/tools/README.md @@ -0,0 +1,14 @@ +Tools for handling unlabeled raw audio. + +To investigate and understand your raw audio data better, +and to be able to isolate potentially significant acoustic +events to reduce time labeling. + +run_extract_noise.py will generate 3s clips from larger wav files +where the RMS of that segment exceeded the average RMS of the +entire clip. This can highlight loud events in an audio file. + +display_rms_and_mel.py will give a visual graph with the mel +spectrogram and RMS chart for a given wav for a sanity check +and to get a better idea of what the spectrogram looks like for +a given RMS peak. diff --git a/tools/run_extract_clips.py b/tools/run_extract_noise.py similarity index 95% rename from tools/run_extract_clips.py rename to tools/run_extract_noise.py index fdad232..e8d55f7 100644 --- a/tools/run_extract_clips.py +++ b/tools/run_extract_noise.py @@ -18,7 +18,7 @@ import argparse import os import yaml -from extract_noise import clip_loud_segments +from whoot.extract_noise import clip_loud_segments if __name__ == "__main__": diff --git a/whoot/__init__.py b/whoot/__init__.py index c0ae2e0..4fb2222 100644 --- a/whoot/__init__.py +++ b/whoot/__init__.py @@ -1 +1,4 @@ __version__ = "0.0.2.dev0" + + +from .extract_noise import clip_loud_segments diff --git a/tools/extract_noise.py b/whoot/extract_noise.py similarity index 100% rename from tools/extract_noise.py rename to whoot/extract_noise.py From f243172e2634ab8e5c08b7e1129ef024d35de4c3 Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Mon, 25 Aug 2025 14:37:42 -0700 Subject: [PATCH 11/13] add some hard coded variables to config adding the frame_length, the hop_length, and desired clip size to the config file to easily adjust the values --- cfgs/extract_noise_example.yaml | 4 +++- tools/run_extract_noise.py | 2 +- whoot/extract_noise.py | 14 +++++++------- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cfgs/extract_noise_example.yaml b/cfgs/extract_noise_example.yaml index 20fe13c..018e9de 100644 --- a/cfgs/extract_noise_example.yaml +++ b/cfgs/extract_noise_example.yaml @@ -1,3 +1,5 @@ audio: /path/to/audio/file/dir/ out: /path/to/output/folder/ - +frame_length: 4096 +hop_length: 2048 +num_sec_slice: 3 diff --git a/tools/run_extract_noise.py b/tools/run_extract_noise.py index e8d55f7..f6ec369 100644 --- a/tools/run_extract_noise.py +++ b/tools/run_extract_noise.py @@ -35,6 +35,6 @@ for file in all_files: try: print(f"running {file}") - clip_loud_segments(os.path.join(config['audio'], file), out) + clip_loud_segments(os.path.join(config['audio'], file), config) except Exception as e: print(f"couldnt load {file} because {e}") diff --git a/whoot/extract_noise.py b/whoot/extract_noise.py index 0454eb0..a7bf47a 100644 --- a/whoot/extract_noise.py +++ b/whoot/extract_noise.py @@ -11,19 +11,19 @@ import soundfile as sf -def clip_loud_segments(file, out): +def clip_loud_segments(file, config): """Extract loud segments from a wav file. Args: file (str): The path of the current wav file. - out (str): The path to the directory to store the + config (str): The path to the directory to store the loud segments. """ index = None filename = file - frame_length = 4096 - hop_length = 2048 - num_sec_slice = 3 + frame_length = config['frame_length'] + hop_length = config['hop_length'] + num_sec_slice = config['num_sec_slice'] sound, sr = librosa.load(filename, sr=None) print(f"sample rate: {sr}") @@ -52,7 +52,7 @@ def clip_loud_segments(file, out): filename = os.path.basename(file) filename = filename.strip('.wav') sound_slice = sound[left_index:right_index] - name = out + filename + "_" + str(index) + ".wav" + name = config['out'] + filename + "_" + str(index) + ".wav" sf.write(name, sound_slice, sr) yes_counter = 0 print(f"created {name}, setting yes_counter back to 0") @@ -69,7 +69,7 @@ def clip_loud_segments(file, out): sound_slice = sound[left_index:stop_index] filename = os.path.basename(file) filename = filename.strip('.wav') - name = out + filename + "_" + str(index) + ".wav" + name = config['out'] + filename + "_" + str(index) + ".wav" sf.write(name, sound_slice, sr) else: print("skipping clip bc it would overlap with last clip") From b2881aa4c01aba3509d1784c115af06c48b5e4c2 Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Mon, 25 Aug 2025 14:38:47 -0700 Subject: [PATCH 12/13] lint --- tools/display_rms_and_mel.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/display_rms_and_mel.py b/tools/display_rms_and_mel.py index a825aee..4d3a26b 100644 --- a/tools/display_rms_and_mel.py +++ b/tools/display_rms_and_mel.py @@ -1,10 +1,20 @@ +"""Display RMS and Mel-Spectrogram + +For a given audio file, you can visualize the RMS and +the associated Mel-Spectrogram with the same time-step to +see how they relate. Replace the filename variable with the +path to your specific audio file. + +Usage: + python3 display_rms_and_mel.py +""" import librosa import librosa.display import matplotlib.pyplot as plt import numpy as np -import soundfile as sf -FILENAME = '/mnt/projects/PandaBear/acoustics/zoo_recordings/2025-05-23/SMM1/GPBZ_SMM01_20250523_103000.wav' # change to path of your sound file + +FILENAME = '' FRAME_LENGTH = 2048 HOP_LENGTH = 512 NUM_SECONDS_OF_SLICE = 3 From 6a01724b8e6050ab96ab55b1117d86f1645fdea8 Mon Sep 17 00:00:00 2001 From: Katie Garwood Date: Mon, 25 Aug 2025 14:42:21 -0700 Subject: [PATCH 13/13] remove unneeded line --- tools/run_extract_noise.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/run_extract_noise.py b/tools/run_extract_noise.py index f6ec369..3c03240 100644 --- a/tools/run_extract_noise.py +++ b/tools/run_extract_noise.py @@ -31,7 +31,6 @@ with open(ARGS.config, 'r', encoding='UTF-8') as f: config = yaml.safe_load(f) all_files = os.listdir(config['audio']) - out = config['out'] for file in all_files: try: print(f"running {file}")