diff --git a/assess_birdnet/README.md b/assess_birdnet/README.md new file mode 100644 index 0000000..73b382f --- /dev/null +++ b/assess_birdnet/README.md @@ -0,0 +1,36 @@ +Tools to assess BirdNET performance on Buowset + +Segments must be a minimum of 3s in order to be assessed by Birdnet, buowsetv1.0 +needs padding but buowsetv1.1 are 3s without artificial padding. And then run +BirdNET analyze over the entire dataset with the desired confidence +thresholding and with burrowing owl as the only class in the species list. + +To run Birdnet over your dataset, follow the instructions in this repo: +https://birdnet-team.github.io/BirdNET-Analyzer/usage/cli.html#birdnet-analyzer-analyze + +We created our own class list with only our species of interest and ran +birdnet_analyzer.analyze over the entire dataset, beginning with default +confidence and sensitivity values. You can adjust these values and rerun +to obtain a comparison of performance across different confidence thresholds +and sensitivity. Birdnet will give you a text file result for each audio file +in your dataset, we had these text files saved to the same directory as the +audio. + +Running aggregate_birdnet_buowset.py with the path to the BirdNET results and +a .pkl file to send the result to will create a dataframe with the name of the +wav file and a 0 for no buow and a 1 for yes buow detected by BirdNET. + +Then running buowset_assess_birdnet.py with the aforementioned .pkl, the +metadata file for buowset, and some optional paramters, you can compare the +performance of BirdNET against the ground truth labels of buowset. By adding +the optional arguments, you go from comparing BirdNET as a burrowing owl/ +no burrowing owl detector to assessing the BirdNET performance on a class by +class basis. If you select to assess for the 'Coocoo' class for example, +it will aggregate all coocoo instances based on ground truth, and obtain +an equal amount of randomly selected no_buow samples, and generate a confusion +matrix comparing if BirdNET marked the instances of that class as burrowing owl. + +Because BirdNET is a binary classifier for burrowing owl in this data, a class +by class comparison only tells us if it disproportionately misses certain calls +more than others when looking for burrowing owls in general, ie it gives us a peak +into the likely call distribution of their training data. diff --git a/assess_birdnet/aggregate_birdnet_buowset.py b/assess_birdnet/aggregate_birdnet_buowset.py new file mode 100644 index 0000000..4b66105 --- /dev/null +++ b/assess_birdnet/aggregate_birdnet_buowset.py @@ -0,0 +1,75 @@ +"""Create a dataframe out of birdnet results. + +When you run BirdNET analyze on wav files, it outputs a +result txt file per each wav. We need to aggregate all +of the results into 1 dataframe and saved out so we can +reference it later when we analyze the birdnet results +for buowset. + +Usage: + python3 aggregate_birdnet_buowset.py /path/to/birdnet/ + analyzer/folder/ /path/to/output.pkl +""" +import argparse +import glob +import os +import ntpath +import pandas as pd + + +def parse_birdnet_analysis(birdnet): + """Create dataframe from individual birdnet result files. + + Args: + birdnet (str): Path to the birdnet results. + + Returns: + pandas.DataFrame: Birdnet results as a single dataframe. + """ + bn_dict = {} + burowl_count = 0 + result_files = glob.glob(os.path.join(birdnet, "*.txt")) + for txt_file in result_files: + filename = ntpath.basename(txt_file) + filename = filename.replace("BirdNET.selection.table.txt", "wav") + with open(txt_file, 'r') as file: + header = file.readline().strip().split('\t') + data = pd.read_csv(file, header=None, names=header, delimiter='\t') + if any(data['Species Code'].str.lower() == 'burowl'): + bn_dict[filename] = 1 + burowl_count += 1 + print(f"New burowl count is {burowl_count}") + else: + bn_dict[filename] = 0 + print("finished dict") + birdnet_df = pd.DataFrame.from_dict(bn_dict, + orient='index', + columns=['bn_label']) + birdnet_df.index.name = 'segment' + return birdnet_df + + +def main(birdnet, output): + """Save out birdnet results to a dataframe. + + Args: + birdnet (str): Path to the birdnet results files. + output (str): Filename for the output pkl. + """ + birdnet_df = parse_birdnet_analysis(birdnet) + birdnet_df.to_pickle(output) + print(birdnet_df) + + +if __name__ == '__main__': + PARSER = argparse.ArgumentParser( + description='Input CSV and model output' + ) + PARSER.add_argument('birdnet_analysis', + type=str, + help='Path to Birdnet analysis folder.') + PARSER.add_argument('output', + type=str, + help='Path to desired output for result.') + ARGS = PARSER.parse_args() + main(ARGS.birdnet_analysis, ARGS.output) diff --git a/assess_birdnet/buowset_assess_birdnet.py b/assess_birdnet/buowset_assess_birdnet.py new file mode 100644 index 0000000..0983ca0 --- /dev/null +++ b/assess_birdnet/buowset_assess_birdnet.py @@ -0,0 +1,188 @@ +"""Assess Birdnet performance on Buowset. + +This allows for assessing how well Birdnet performs on +buowset for burrowing owl/no burrowing owl, and for +the individual call types within our labeled data. + +Usage: + python3 buowset_assess_birdnet.py /path/to/birdnet/output/ + /path/to/buowset/metadata.csv +""" +import argparse +import random +import pandas as pd +from sklearn.metrics import confusion_matrix, accuracy_score +from sklearn.metrics import recall_score, f1_score, precision_score +from comet_ml import Experiment + + +def organize_birdnet_output(birdnet_results): + """Open the birdnet results file. + + Args: + birdnet_results (string): Path to the .pkl aggregated results. + + Returns: + pd.DataFrame + """ + birdnet_df = pd.read_pickle(birdnet_results) + return birdnet_df + + +def merge_metadata(metadata, birdnet_df): + """Combine metadata and birdnet results by segment. + + Args: + metadata (string): Path to metadata file. + birdnet_df (pd.DataFrame): The birdnet results file. + Returns: + pd.DataFrame + """ + meta = pd.read_csv(metadata, index_col=0) + df_merged = meta.merge(birdnet_df, on='segment') + df_merged = df_merged.drop(columns=['segment_duration_s', 'fold']) + + return df_merged + + +def map_binary_labels(merged_data): + """Obtain the two dataframes for the predicted and true labels. + + Args: + """ + y_true = merged_data['label'].map( + {0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 0} + ).values + y_pred = merged_data['bn_label'].values + + return y_true, y_pred + + +def map_class_labels(merged_data, assess_class): + """Create binary class vs no buow assessment for 1 class. + + Args: + merged_data (pd.DataFrame): The birdnet label and ground truth + merged on segment name. + assess_class (int): The number associated with the specific + vocalization type to be assessed. + """ + class_only = merged_data[merged_data['label'] == assess_class] + num_rows = len(class_only) - 1 + no_buow_only = merged_data[merged_data['label'] == 5] + num_no_buow_rows = len(no_buow_only) - 1 + available_numbers = list(range(0, num_rows)) + available_indexes = list(range(0, num_no_buow_rows)) + index_no_buow = [] + index = 0 + while available_numbers: + selected_number = random.choice(available_indexes) + index_no_buow.append(selected_number) + available_indexes.remove(selected_number) + available_numbers.remove(index) + index += 1 + no_buow_subset = no_buow_only.iloc[index_no_buow] + merged = pd.concat([no_buow_subset, class_only], ignore_index=True) + y_true = merged['label'].map({assess_class: 1, 5: 0}).values + y_pred = merged['bn_label'].values + + return y_true, y_pred + + +def assess_birdnet(y_true, y_pred, experiment=None): + """Assess Birdnet against ground truth labels. + + Args: + y_true (pd.DataFrame): + y_pred (pd.DataFrame): + experiment: + """ + confusion_m = confusion_matrix(y_true, y_pred) + accuracy = accuracy_score(y_true, y_pred) + precision = precision_score(y_true, y_pred) + recall = recall_score(y_true, y_pred) + f1_result = f1_score(y_true, y_pred) + + print("Confusion Matrix:") + print(confusion_m) + print(f"Accuracy: {accuracy:.4f}") + print(f"Precision: {precision:.4f}") + print(f"Recall: {recall:.4f}") + print(f"F1 Score: {f1_result:.4f}") + + if experiment: + experiment.log_metric("accuracy", accuracy) + experiment.log_metric("precision", precision) + experiment.log_metric("recall", recall) + experiment.log_metric("f1_score", f1_result) + experiment.log_confusion_matrix(matrix=confusion_m.tolist(), + labels=["No Detection", "Detection"]) + + +def create_comet_exp(): + """Create the comet experiment settings. + + Returns: + () + """ + project = input("Enter the comet project name: ") + work_space = input("Enter the comet workspace: ") + experiment_name = input("Enter the name of this experiment: ") + experiment = Experiment( + project_name=project, + workspace=work_space + ) + experiment.set_name(experiment_name) + experiment.add_tags(["burrowl", "birdnet", "binary-classification"]) + + return experiment + + +def main(birdnet_results, metadata, single_class, assess_class): + """Assess birdnet. + + Args: + birdnet_results (string): Path to the .pkl of the aggregated + birdnet results. + metadata (string): Path to the metadata.csv + single_class (bool): Default false for buow/no buow, true for + assessing an individual class. + assess_class (int): The class number to be assessed if single_class + called true. + """ + print("Starting") + experiment = create_comet_exp() + print("Aggregating BirdNET results.") + birdnet_df = organize_birdnet_output(birdnet_results) + print(f"Aggregated {len(birdnet_df)} BirdNET results.") + print(f"Matching ground truth labels to BirdNET results.") + merged_data = merge_metadata(metadata, birdnet_df) + print("Comparing BirdNET labels to ground truth.") + if single_class is False: + print("Doing binary buow/no_buow assessment") + y_true, y_pred = map_binary_labels(merged_data) + else: + print(f"Assessing performance of Birdnet on: {assess_class}") + y_true, y_pred = map_class_labels(merged_data, assess_class) + assess_birdnet(y_true, y_pred, experiment=experiment) + + +if __name__ == '__main__': + PARSER = argparse.ArgumentParser( + description='Input Directory Path' + ) + PARSER.add_argument('birdnet_results', + type=str, + help='Path to Birdnet results for padded buowset.') + PARSER.add_argument('metadata', + type=str, + help='Path to buowset metadata file.') + PARSER.add_argument('-single_class', action='store_true', + help='Call for individual class assessment.') + PARSER.add_argument('-assess_class', default=None, type=int, + help='Which class would you like to assess?') + ARGS = PARSER.parse_args() + main(ARGS.birdnet_results, + ARGS.metadata, + ARGS.single_class, + ARGS.assess_class) diff --git a/assess_birdnet/tools/README.md b/assess_birdnet/tools/README.md new file mode 100644 index 0000000..cddc285 --- /dev/null +++ b/assess_birdnet/tools/README.md @@ -0,0 +1,9 @@ +Older tools to assess birdnet against the human labeled burrowing owl data from 2017-2018. + +These tools assess birdnet by splitting all audio into 3s chunks, irregardless of where +a labeled detection occurred, and adds the human labels onto the 3s chunks after this +chunking occurs, if the detection window has ANY overlap with a 3s segment. It then +compares this on an individual wav file basis to the birdnet results for the same data. + +We have since moved onto assessing BirdNET on Buowset, the dataset created out of our +human labeled burrowing owl data. diff --git a/assess_birdnet/aggregate_audio_analysis.py b/assess_birdnet/tools/aggregate_split_bn_audio.py similarity index 100% rename from assess_birdnet/aggregate_audio_analysis.py rename to assess_birdnet/tools/aggregate_split_bn_audio.py diff --git a/assess_birdnet/assess_performance.py b/assess_birdnet/tools/assess_performance.py similarity index 100% rename from assess_birdnet/assess_performance.py rename to assess_birdnet/tools/assess_performance.py diff --git a/assess_birdnet/normalize_birdnet_output.py b/assess_birdnet/tools/normalize_birdnet_output.py similarity index 100% rename from assess_birdnet/normalize_birdnet_output.py rename to assess_birdnet/tools/normalize_birdnet_output.py diff --git a/assess_birdnet/normalize_scored_output.py b/assess_birdnet/tools/normalize_scored_output.py similarity index 82% rename from assess_birdnet/normalize_scored_output.py rename to assess_birdnet/tools/normalize_scored_output.py index ffeb2df..c3cb80a 100644 --- a/assess_birdnet/normalize_scored_output.py +++ b/assess_birdnet/tools/normalize_scored_output.py @@ -71,8 +71,18 @@ def mark_intervals(row, chunks_df): start_chunk = int(start_time // 3) end_chunk = int(end_time // 3) - if row['TOP1MATCH'] != 'null': - chunks_df.loc[start_chunk:end_chunk, 'Label'] = 'yes' + row_lower['MANUAL ID*'] = row['MANUAL ID*'].str.lower() + + if row_lower['MANUAL ID*'] == 'cluck': + chunks_df.loc[start_chunk:end_chunk, 'Label'] = '0' + elif row_lower['MANUAL ID*'] == 'coocoo': + chunks_df.loc[start_chunk:end_chunk, 'Label'] = '1' + elif row_lower['MANUAL ID*'] == 'twitter': + chunks_df.loc[start_chunk:end_chunk, 'Label'] = '2' + elif row_lower['MANUAL ID*'] == 'alarm': + chunks_df.loc[start_chunk:end_chunk, 'Label'] = '3' + elif row_lower['MANUAL ID*'] == 'chick begging': + chunks_df.loc[start_chunk:end_chunk, 'Label'] = '4' if __name__ == '__main__':