From 2ffa8f583527b00c9405a0432f68432fe94cd3d8 Mon Sep 17 00:00:00 2001
From: Katie Garwood <kgarwood@sdzwa.org>
Date: Mon, 30 Jun 2025 13:53:54 -0700
Subject: [PATCH 1/8] add pseudocode for scripts to assess birdnet

one script to convert the birdnet results for buowset into
a reusable pkl for the other script who will be able to assess
against the ground truth
---
 assess_birdnet/aggregate_birdnet_buowset.py | 44 ++++++++++++++++
 assess_birdnet/buowset_assess_birdnet.py    | 57 +++++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 assess_birdnet/aggregate_birdnet_buowset.py
 create mode 100644 assess_birdnet/buowset_assess_birdnet.py

diff --git a/assess_birdnet/aggregate_birdnet_buowset.py b/assess_birdnet/aggregate_birdnet_buowset.py
new file mode 100644
index 0000000..0f4f274
--- /dev/null
+++ b/assess_birdnet/aggregate_birdnet_buowset.py
@@ -0,0 +1,44 @@
+"""Create a dataframe out of birdnet results.
+
+When you run BirdNET analyze on wav files, it outputs a
+result txt file per each wav. We need to aggregate all
+of the results into 1 dataframe and saved out so we can
+reference it later when we analyze the birdnet results
+for buowset.
+
+Usage:
+    python3 aggregate_birdnet_buowset.py /path/to/birdnet/
+    analyzer/folder/ /path/to/output.pkl
+"""
+
+
+def parse_birdnet_analysis(birdnet):
+    """Create dataframe from individual birdnet result files.
+    """
+    open each txt file in directory and obtain the label for
+    the segment, and associate it with a key value where the
+    key is the filename minus the birdnet stuff and then the
+    label is the value
+    convert that whole thing to a df.
+    return birdnet_df
+
+
+def main(birdnet, output):
+    """Save out birdnet results to a dataframe.
+    """
+    birdnet_df = parse_birdnet_analysis(birdnet)
+    save birdnet_df as "output".pkl
+
+if __name__ = '__main__':
+    parser = argparse.ArgumentParser(
+        description='Input CSV and model output'
+        )
+    parser.add_argument('birdnet_analysis',
+                        type=str,
+                        help='Path to Birdnet analysis folder.')
+    parser.add_argument('output',
+                        type=str,
+                        help='Path to desired output for result.')
+    args = parser.parse_args()
+    main(args.birdnet_analysis, args.output)
+
diff --git a/assess_birdnet/buowset_assess_birdnet.py b/assess_birdnet/buowset_assess_birdnet.py
new file mode 100644
index 0000000..ac7ea5f
--- /dev/null
+++ b/assess_birdnet/buowset_assess_birdnet.py
@@ -0,0 +1,57 @@
+"""Assess Birdnet performance on Buowset.
+
+This allows for assessing how well Birdnet performs on
+buowset for burrowing owl/no burrowing owl, and for
+the individual call types within our labeled data.
+
+Usage:
+    python3 buowset_assess_birdnet.py /path/to/birdnet/output/
+    /path/to/buowset/metadata.csv
+"""
+
+
+def organize_birdnet_output(birdnet_results):
+    """
+    """
+    open up the pkl as dataframe
+    return birdnet_df
+
+def merge_metadata(metadata, birdnet_df):
+    """
+    """
+    open metadata as a df and merge on the filename
+    with column of the real_label and a column 
+    forthe birdnet label
+    return merged_data
+
+def assess_birdnet(merged_data):
+    """
+    """
+    create an x of the ground truth labels and a y of the
+    birdnet labels and just run metrics on them
+
+def main(birdnet_results, metadata):
+    """Assess birdnet.
+    """
+    print("Starting")
+    print*"Aggregating BirdNET results.")
+    birdnet_df = organize_birdnet_output(birdnet_results)
+    print(f"Aggregated {len(birdnet_df)} BirdNET results.")
+    print(f"Matching ground truth labels to BirdNET results.")
+    merged_data = merge_metadata(metadata, birdnet_df)
+    print("Comparing BirdNET labels to ground truth.")
+    assess_birdnet(merged_data)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Input Directory Path'
+        )
+    parser.add_argument('birdnet_results',
+                        type=str,
+                        help='Path to Birdnet results for padded buowset.')
+    parser.add_argument('metadata',
+                        type=str,
+                        help='Path to buowset metadata file.')
+    args = parser.parse_args()
+    main(args.birdnet_results, args.metadata)

From 87742d1ca11a6b2666f3ce311503b9311ccb96bd Mon Sep 17 00:00:00 2001
From: Katie Garwood <kgarwood@sdzwa.org>
Date: Mon, 30 Jun 2025 16:47:39 -0700
Subject: [PATCH 2/8] merges metadata label with birdnet results

now have a script that aligns the birdnet result along with
the buowset labels in one dataframe so we can easily compare the
2 with the next function i have to add. the data prep is done
just need to add in the function to compare the results
---
 assess_birdnet/aggregate_birdnet_buowset.py | 30 ++++++++++++++++-----
 assess_birdnet/buowset_assess_birdnet.py    | 24 ++++++++++-------
 2 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/assess_birdnet/aggregate_birdnet_buowset.py b/assess_birdnet/aggregate_birdnet_buowset.py
index 0f4f274..8a2c5ba 100644
--- a/assess_birdnet/aggregate_birdnet_buowset.py
+++ b/assess_birdnet/aggregate_birdnet_buowset.py
@@ -10,16 +10,31 @@
     python3 aggregate_birdnet_buowset.py /path/to/birdnet/
     analyzer/folder/ /path/to/output.pkl
 """
+import argparse
+import pandas as pd
+import glob
+import os
+import ntpath
 
 
 def parse_birdnet_analysis(birdnet):
     """Create dataframe from individual birdnet result files.
     """
-    open each txt file in directory and obtain the label for
-    the segment, and associate it with a key value where the
-    key is the filename minus the birdnet stuff and then the
-    label is the value
-    convert that whole thing to a df.
+    bn_dict = {}
+    result_files = glob.glob(os.path.join(birdnet, "*.txt"))
+    for txt_file in result_files:
+        filename = ntpath.basename(txt_file)
+        filename = filename.replace("BirdNET.selection.table.txt", "wav")
+        with open(txt_file, 'r') as f:
+            header = f.readline().strip().split('\t')
+            data = pd.read_csv(f, header=None, names=header, delimiter='\t')
+        if 'burowl' in data['Species Code']:
+            bn_dict[filename] = 1
+        else:
+            bn_dict[filename] = 0
+    print("finished dict")
+    birdnet_df = pd.DataFrame.from_dict(bn_dict, orient='index', columns=['bn_label'])
+    birdnet_df.index.name = 'segment'
     return birdnet_df
 
 
@@ -27,9 +42,10 @@ def main(birdnet, output):
     """Save out birdnet results to a dataframe.
     """
     birdnet_df = parse_birdnet_analysis(birdnet)
-    save birdnet_df as "output".pkl
+    birdnet_df.to_pickle(output)
+    print(birdnet_df)
 
-if __name__ = '__main__':
+if __name__ == '__main__':
     parser = argparse.ArgumentParser(
         description='Input CSV and model output'
         )
diff --git a/assess_birdnet/buowset_assess_birdnet.py b/assess_birdnet/buowset_assess_birdnet.py
index ac7ea5f..8b2f091 100644
--- a/assess_birdnet/buowset_assess_birdnet.py
+++ b/assess_birdnet/buowset_assess_birdnet.py
@@ -8,39 +8,43 @@
     python3 buowset_assess_birdnet.py /path/to/birdnet/output/
     /path/to/buowset/metadata.csv
 """
+import argparse
+import pandas as pd
 
 
 def organize_birdnet_output(birdnet_results):
     """
     """
-    open up the pkl as dataframe
+    birdnet_df = pd.read_pickle(birdnet_results)
     return birdnet_df
 
 def merge_metadata(metadata, birdnet_df):
     """
     """
-    open metadata as a df and merge on the filename
-    with column of the real_label and a column 
-    forthe birdnet label
-    return merged_data
+    meta = pd.read_csv(metadata, index_col=0)
+    df_merged = meta.merge(birdnet_df, on='segment')
+    df_merged = df_merged.drop(columns=['segment_duration_s', 'fold'])
+
+    return df_merged
 
 def assess_birdnet(merged_data):
     """
     """
-    create an x of the ground truth labels and a y of the
-    birdnet labels and just run metrics on them
+    #create an x of the ground truth labels and a y of the
+    #birdnet labels and just run metrics on them
 
 def main(birdnet_results, metadata):
     """Assess birdnet.
     """
     print("Starting")
-    print*"Aggregating BirdNET results.")
+    print("Aggregating BirdNET results.")
     birdnet_df = organize_birdnet_output(birdnet_results)
     print(f"Aggregated {len(birdnet_df)} BirdNET results.")
     print(f"Matching ground truth labels to BirdNET results.")
     merged_data = merge_metadata(metadata, birdnet_df)
-    print("Comparing BirdNET labels to ground truth.")
-    assess_birdnet(merged_data)
+    print(merged_data)
+    #print("Comparing BirdNET labels to ground truth.")
+    #assess_birdnet(merged_data)
 
 
 if __name__ == '__main__':

From 9c5b7602d111072e60e0dfcd09bbb944144eecbd Mon Sep 17 00:00:00 2001
From: Katie Garwood <kgarwood@sdzwa.org>
Date: Tue, 1 Jul 2025 14:53:28 -0700
Subject: [PATCH 3/8] scripts work

obtained a confusion matrix for birdnet on buowset. super high
precision, super low accuracy. doesnt seem to get false positives
but has mannyyyy false negatives. need to add docstrings and a way
to put these metrics of birdnet on cometml
---
 assess_birdnet/aggregate_birdnet_buowset.py |  5 ++++-
 assess_birdnet/buowset_assess_birdnet.py    | 24 +++++++++++++++++----
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/assess_birdnet/aggregate_birdnet_buowset.py b/assess_birdnet/aggregate_birdnet_buowset.py
index 8a2c5ba..d920515 100644
--- a/assess_birdnet/aggregate_birdnet_buowset.py
+++ b/assess_birdnet/aggregate_birdnet_buowset.py
@@ -21,6 +21,7 @@ def parse_birdnet_analysis(birdnet):
     """Create dataframe from individual birdnet result files.
     """
     bn_dict = {}
+    burowl_count = 0
     result_files = glob.glob(os.path.join(birdnet, "*.txt"))
     for txt_file in result_files:
         filename = ntpath.basename(txt_file)
@@ -28,8 +29,10 @@ def parse_birdnet_analysis(birdnet):
         with open(txt_file, 'r') as f:
             header = f.readline().strip().split('\t')
             data = pd.read_csv(f, header=None, names=header, delimiter='\t')
-        if 'burowl' in data['Species Code']:
+        if any(data['Species Code'].str.lower() == 'burowl'):
             bn_dict[filename] = 1
+            burowl_count += 1
+            print(f"Found another burrowing owl, new burowl count is {burowl_count}")
         else:
             bn_dict[filename] = 0
     print("finished dict")
diff --git a/assess_birdnet/buowset_assess_birdnet.py b/assess_birdnet/buowset_assess_birdnet.py
index 8b2f091..141a186 100644
--- a/assess_birdnet/buowset_assess_birdnet.py
+++ b/assess_birdnet/buowset_assess_birdnet.py
@@ -10,6 +10,8 @@
 """
 import argparse
 import pandas as pd
+from sklearn.metrics import confusion_matrix, accuracy_score, precision_score
+from sklearn.metrics import recall_score, f1_score 
 
 
 def organize_birdnet_output(birdnet_results):
@@ -30,8 +32,22 @@ def merge_metadata(metadata, birdnet_df):
 def assess_birdnet(merged_data):
     """
     """
-    #create an x of the ground truth labels and a y of the
-    #birdnet labels and just run metrics on them
+    y_true = merged_data['label'].map({0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 0}).values
+    y_pred = merged_data['bn_label'].values
+
+    confusion_m = confusion_matrix(y_true, y_pred)
+    accuracy = accuracy_score(y_true, y_pred)
+    precision = precision_score(y_true, y_pred)
+    recall = recall_score(y_true, y_pred)
+    f1_result = f1_score(y_true, y_pred)
+
+    print("Confusion Matrix:")
+    print(confusion_m)
+    print(f"Accuracy: {accuracy:.4f}")
+    print(f"Precision: {precision:.4f}")
+    print(f"Recall: {recall:.4f}")
+    print(f"F1 Score: {f1_result:.4f}")
+
 
 def main(birdnet_results, metadata):
     """Assess birdnet.
@@ -43,8 +59,8 @@ def main(birdnet_results, metadata):
     print(f"Matching ground truth labels to BirdNET results.")
     merged_data = merge_metadata(metadata, birdnet_df)
     print(merged_data)
-    #print("Comparing BirdNET labels to ground truth.")
-    #assess_birdnet(merged_data)
+    print("Comparing BirdNET labels to ground truth.")
+    assess_birdnet(merged_data)
 
 
 if __name__ == '__main__':

From 8293a791e34148f8a6cfcb1db5b1caae7c8cd111 Mon Sep 17 00:00:00 2001
From: Katie Garwood <kgarwood@sdzwa.org>
Date: Thu, 3 Jul 2025 15:12:59 -0700
Subject: [PATCH 4/8] add comet stuff and ability to assess on a single class

this code allows you to obtain the performance metrics for
birdnet against a ground truth dataset (specifically buowset)
as a binary model, and also allows you to specify a certain
vocalization class to isolate the performance metrics for. it
also allows you to post these results to comet
---
 assess_birdnet/buowset_assess_birdnet.py | 84 ++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 7 deletions(-)

diff --git a/assess_birdnet/buowset_assess_birdnet.py b/assess_birdnet/buowset_assess_birdnet.py
index 141a186..880f6c8 100644
--- a/assess_birdnet/buowset_assess_birdnet.py
+++ b/assess_birdnet/buowset_assess_birdnet.py
@@ -11,7 +11,9 @@
 import argparse
 import pandas as pd
 from sklearn.metrics import confusion_matrix, accuracy_score, precision_score
-from sklearn.metrics import recall_score, f1_score 
+from sklearn.metrics import recall_score, f1_score
+from comet_ml import Experiment
+import random
 
 
 def organize_birdnet_output(birdnet_results):
@@ -29,12 +31,47 @@ def merge_metadata(metadata, birdnet_df):
 
     return df_merged
 
-def assess_birdnet(merged_data):
-    """
+def map_binary_labels(merged_data):
+    """Obtain the two dataframes for the predicted and true labels.
+
+    Args:
     """
     y_true = merged_data['label'].map({0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 0}).values
     y_pred = merged_data['bn_label'].values
 
+    return y_true, y_pred
+
+
+def map_class_labels(merged_data, assess_class):
+    """
+    """
+    class_only = merged_data[merged_data['label'] == assess_class]
+    num_rows = len(class_only) - 1
+    no_buow_only = merged_data[merged_data['label'] == 5]
+    num_no_buow_rows = len(no_buow_only) - 1
+    available_numbers = list(range(0, num_rows))
+    available_indexes = list(range(0, num_no_buow_rows))
+    index_no_buow = []
+    index = 0
+    while available_numbers:
+        selected_number = random.choice(available_indexes)
+        index_no_buow.append(selected_number)
+        available_indexes.remove(selected_number)
+        available_numbers.remove(index)
+        index += 1
+    no_buow_subset = no_buow_only.iloc[index_no_buow]
+    merged = pd.concat([no_buow_subset, class_only], ignore_index=True)
+    y_true = merged['label'].map({assess_class: 1, 5: 0}).values
+    y_pred = merged['bn_label'].values
+
+    return y_true, y_pred
+
+
+def assess_birdnet(y_true, y_pred, experiment=None):
+    """Assess Birdnet against ground truth labels.
+
+    Args:
+    """
     confusion_m = confusion_matrix(y_true, y_pred)
     accuracy = accuracy_score(y_true, y_pred)
     precision = precision_score(y_true, y_pred)
@@ -48,19 +85,48 @@ def assess_birdnet(merged_data):
     print(f"Recall: {recall:.4f}")
     print(f"F1 Score: {f1_result:.4f}")
 
+    if experiment:
+        experiment.log_metric("accuracy", accuracy)
+        experiment.log_metric("precision", precision)
+        experiment.log_metric("recall", recall)
+        experiment.log_metric("f1_score", f1_result)
+        experiment.log_confusion_matrix(matrix=confusion_m.tolist(),
+                                        labels=["No Detection", "Detection"])
+
+
+def create_comet_exp():
+    """
+    """
+    project = input("Enter the comet project name you'd like this experiment to have/be associated with: ")
+    work_space = input("Enter the comet workspace (username or organization) this experiment will go in: ")
+    experiment_name = input("Enter the name of this experiment: ")
+    experiment = Experiment(
+        project_name=project,
+        workspace=work_space
+    )
+    experiment.set_name(experiment_name)
+    experiment.add_tags(["burrowl", "birdnet", "binary-classification"])
+
+    return experiment
 
-def main(birdnet_results, metadata):
+def main(birdnet_results, metadata, not_binary, assess_class):
     """Assess birdnet.
     """
     print("Starting")
+    experiment = create_comet_exp()
     print("Aggregating BirdNET results.")
     birdnet_df = organize_birdnet_output(birdnet_results)
     print(f"Aggregated {len(birdnet_df)} BirdNET results.")
     print(f"Matching ground truth labels to BirdNET results.")
     merged_data = merge_metadata(metadata, birdnet_df)
-    print(merged_data)
     print("Comparing BirdNET labels to ground truth.")
-    assess_birdnet(merged_data)
+    if not_binary == True:
+        print("Doing binary buow/no_buow assessment")
+        y_true, y_pred = map_binary_labels(merged_data)
+    else:
+        print(f"Assessing performance of Birdnet on vocalization: {assess_class}")
+        y_true, y_pred = map_class_labels(merged_data, assess_class)
+    assess_birdnet(y_true, y_pred, experiment=experiment)
 
 
 if __name__ == '__main__':
@@ -73,5 +139,9 @@ def main(birdnet_results, metadata):
     parser.add_argument('metadata',
                         type=str,
                         help='Path to buowset metadata file.')
+    parser.add_argument('-not_binary', action='store_false',
+                        help='Default true binary assessment, call for individual class assessment.')
+    parser.add_argument('-assess_class', default=None, type=int,
+                        help='Which class would you like to assess individually?')
     args = parser.parse_args()
-    main(args.birdnet_results, args.metadata)
+    main(args.birdnet_results, args.metadata, args.not_binary, args.assess_class)

From f9a9a1497e7292fe4d85356f935af7e6b43bb8dc Mon Sep 17 00:00:00 2001
From: Katie Garwood <kgarwood@sdzwa.org>
Date: Thu, 3 Jul 2025 15:33:55 -0700
Subject: [PATCH 5/8] moving around some old files and adding a little readme
 for each

---
 assess_birdnet/README.md                      | 25 +++++++++++++++++++
 assess_birdnet/tools/README.md                |  9 +++++++
 .../aggregate_split_bn_audio.py}              |  0
 .../{ => tools}/assess_performance.py         |  0
 .../{ => tools}/normalize_birdnet_output.py   |  0
 .../{ => tools}/normalize_scored_output.py    | 14 +++++++++--
 6 files changed, 46 insertions(+), 2 deletions(-)
 create mode 100644 assess_birdnet/README.md
 create mode 100644 assess_birdnet/tools/README.md
 rename assess_birdnet/{aggregate_audio_analysis.py => tools/aggregate_split_bn_audio.py} (100%)
 rename assess_birdnet/{ => tools}/assess_performance.py (100%)
 rename assess_birdnet/{ => tools}/normalize_birdnet_output.py (100%)
 rename assess_birdnet/{ => tools}/normalize_scored_output.py (82%)

diff --git a/assess_birdnet/README.md b/assess_birdnet/README.md
new file mode 100644
index 0000000..bb27659
--- /dev/null
+++ b/assess_birdnet/README.md
@@ -0,0 +1,25 @@
+Tools to assess BirdNET performance on Buowset
+
+First, one must pad the buowset detections that are shorter than 3s, to be
+a minimum of 3s. And then run BirdNET analyze over the entire dataset with
+the desired confidence thresholding and with burrowing owl as the only class
+in the species list.
+
+Running aggregate_birdnet_buowset.py with the path to the BirdNET results and
+a .pkl file to send the result to will create a dataframe with the name of the
+wav file and a 0 for no buow and a 1 for yes buow detected by BirdNET. 
+
+Then running buowset_assess_birdnet.py with the aforementioned .pkl, the
+metadata file for buowset, and some optional paramters, you can compare the
+performance of BirdNET against the ground truth labels of buowset. By adding
+the optional arguments, you go from comparing BirdNET as a burrowing owl/
+no burrowing owl detector to assessing the BirdNET performance on a class by
+class basis. If you select to assess for the 'Coocoo' class for example, 
+it will aggregate all coocoo instances based on ground truth, and obtain
+an equal amount of randomly selected no_buow samples, and generate a confusion
+matrix comparing if BirdNET marked the instances of that class as burrowing owl. 
+
+Because BirdNET is a binary classifier for burrowing owl in this data, a class
+by class comparison only tells us if it disproportionately misses certain calls
+more than others when looking for burrowing owls in general, ie it gives us a peak
+into the likely call distribution of their training data. 
diff --git a/assess_birdnet/tools/README.md b/assess_birdnet/tools/README.md
new file mode 100644
index 0000000..cddc285
--- /dev/null
+++ b/assess_birdnet/tools/README.md
@@ -0,0 +1,9 @@
+Older tools to assess birdnet against the human labeled burrowing owl data from 2017-2018.
+
+These tools assess birdnet by splitting all audio into 3s chunks, irregardless of where
+a labeled detection occurred, and adds the human labels onto the 3s chunks after this
+chunking occurs, if the detection window has ANY overlap with a 3s segment. It then
+compares this on an individual wav file basis to the birdnet results for the same data.
+
+We have since moved onto assessing BirdNET on Buowset, the dataset created out of our
+human labeled burrowing owl data. 
diff --git a/assess_birdnet/aggregate_audio_analysis.py b/assess_birdnet/tools/aggregate_split_bn_audio.py
similarity index 100%
rename from assess_birdnet/aggregate_audio_analysis.py
rename to assess_birdnet/tools/aggregate_split_bn_audio.py
diff --git a/assess_birdnet/assess_performance.py b/assess_birdnet/tools/assess_performance.py
similarity index 100%
rename from assess_birdnet/assess_performance.py
rename to assess_birdnet/tools/assess_performance.py
diff --git a/assess_birdnet/normalize_birdnet_output.py b/assess_birdnet/tools/normalize_birdnet_output.py
similarity index 100%
rename from assess_birdnet/normalize_birdnet_output.py
rename to assess_birdnet/tools/normalize_birdnet_output.py
diff --git a/assess_birdnet/normalize_scored_output.py b/assess_birdnet/tools/normalize_scored_output.py
similarity index 82%
rename from assess_birdnet/normalize_scored_output.py
rename to assess_birdnet/tools/normalize_scored_output.py
index ffeb2df..c3cb80a 100644
--- a/assess_birdnet/normalize_scored_output.py
+++ b/assess_birdnet/tools/normalize_scored_output.py
@@ -71,8 +71,18 @@ def mark_intervals(row, chunks_df):
     start_chunk = int(start_time // 3)
     end_chunk = int(end_time // 3)
 
-    if row['TOP1MATCH'] != 'null':
-        chunks_df.loc[start_chunk:end_chunk, 'Label'] = 'yes'
+    row_lower['MANUAL ID*'] = row['MANUAL ID*'].str.lower()
+
+    if row_lower['MANUAL ID*'] == 'cluck':
+        chunks_df.loc[start_chunk:end_chunk, 'Label'] = '0'
+    elif row_lower['MANUAL ID*'] == 'coocoo':
+        chunks_df.loc[start_chunk:end_chunk, 'Label'] = '1'
+    elif row_lower['MANUAL ID*'] == 'twitter':
+        chunks_df.loc[start_chunk:end_chunk, 'Label'] = '2'
+    elif row_lower['MANUAL ID*'] == 'alarm':
+        chunks_df.loc[start_chunk:end_chunk, 'Label'] = '3'
+    elif row_lower['MANUAL ID*'] == 'chick begging':
+        chunks_df.loc[start_chunk:end_chunk, 'Label'] = '4'
 
 
 if __name__ == '__main__':

From 152f377986212a9266e130f9eb10d34fc08b8d9f Mon Sep 17 00:00:00 2001
From: Katie Garwood <kgarwood@sdzwa.org>
Date: Thu, 10 Jul 2025 15:23:02 -0700
Subject: [PATCH 6/8] updating some readme stuff

---
 assess_birdnet/buowset_assess_birdnet.py | 48 ++++++++++++++++++++----
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/assess_birdnet/buowset_assess_birdnet.py b/assess_birdnet/buowset_assess_birdnet.py
index 880f6c8..1bd90d9 100644
--- a/assess_birdnet/buowset_assess_birdnet.py
+++ b/assess_birdnet/buowset_assess_birdnet.py
@@ -17,13 +17,25 @@
 
 
 def organize_birdnet_output(birdnet_results):
-    """
+    """Open the birdnet results file.
+
+    Args:
+        birdnet_results (string): Path to the .pkl aggregated results.
+
+    Returns:
+        pd.DataFrame
     """
     birdnet_df = pd.read_pickle(birdnet_results)
     return birdnet_df
 
 def merge_metadata(metadata, birdnet_df):
-    """
+    """Combine metadata and birdnet results by segment.
+
+    Args:
+        metadata (string): Path to metadata file.
+        birdnet_df (pd.DataFrame): The birdnet results file.
+    Returns:
+        pd.DataFrame
     """
     meta = pd.read_csv(metadata, index_col=0)
     df_merged = meta.merge(birdnet_df, on='segment')
@@ -43,7 +55,12 @@ def map_binary_labels(merged_data):
 
 
 def map_class_labels(merged_data, assess_class):
-    """
+    """Create binary class vs no buow assessment for 1 class.
+
+    Args:
+        merged_data (pd.DataFrame): The 
+        assess_class (int): The number associated with the specific
+            vocalization type to be assessed.
     """
     class_only = merged_data[merged_data['label'] == assess_class]
     num_rows = len(class_only) - 1
@@ -71,6 +88,9 @@ def assess_birdnet(y_true, y_pred, experiment=None):
     """Assess Birdnet against ground truth labels.
 
     Args:
+        y_true (pd.DataFrame):
+        y_pred (pd.DataFrame):
+        experiment:
     """
     confusion_m = confusion_matrix(y_true, y_pred)
     accuracy = accuracy_score(y_true, y_pred)
@@ -95,7 +115,10 @@ def assess_birdnet(y_true, y_pred, experiment=None):
 
 
 def create_comet_exp():
-    """
+    """Create the comet experiment settings.
+
+    Returns:
+        ()
     """
     project = input("Enter the comet project name you'd like this experiment to have/be associated with: ")
     work_space = input("Enter the comet workspace (username or organization) this experiment will go in: ")
@@ -109,8 +132,17 @@ def create_comet_exp():
 
     return experiment
 
-def main(birdnet_results, metadata, not_binary, assess_class):
+def main(birdnet_results, metadata, single_class, assess_class):
     """Assess birdnet.
+
+    Args:
+        birdnet_results (string): Path to the .pkl of the aggregated
+            birdnet results.
+        metadata (string): Path to the metadata.csv
+        single_class (bool): Default false for buow/no buow, true for
+            assessing an individual class.
+        assess_class (int): The class number to be assessed if single_class
+            called true.
     """
     print("Starting")
     experiment = create_comet_exp()
@@ -120,7 +152,7 @@ def main(birdnet_results, metadata, not_binary, assess_class):
     print(f"Matching ground truth labels to BirdNET results.")
     merged_data = merge_metadata(metadata, birdnet_df)
     print("Comparing BirdNET labels to ground truth.")
-    if not_binary == True:
+    if single_class == False:
         print("Doing binary buow/no_buow assessment")
         y_true, y_pred = map_binary_labels(merged_data)
     else:
@@ -139,9 +171,9 @@ def main(birdnet_results, metadata, not_binary, assess_class):
     parser.add_argument('metadata',
                         type=str,
                         help='Path to buowset metadata file.')
-    parser.add_argument('-not_binary', action='store_false',
+    parser.add_argument('-single_class', action='store_true',
                         help='Default true binary assessment, call for individual class assessment.')
     parser.add_argument('-assess_class', default=None, type=int,
                         help='Which class would you like to assess individually?')
     args = parser.parse_args()
-    main(args.birdnet_results, args.metadata, args.not_binary, args.assess_class)
+    main(args.birdnet_results, args.metadata, args.single_class, args.assess_class)

From b457c8de884c23068e78751636fa33b384a096ea Mon Sep 17 00:00:00 2001
From: Katie Garwood <kgarwood@sdzwa.org>
Date: Tue, 12 Aug 2025 09:48:46 -0700
Subject: [PATCH 7/8] fixed pylint and flake8

there were some pylint and flake8 errors. This could be turned into
something in the whoot package itself but maybe should happen as
a refactor after merging to dev because this branch is behind dev
and doesn't have the __init__ stuff.
---
 assess_birdnet/README.md                    | 10 ++---
 assess_birdnet/aggregate_birdnet_buowset.py | 36 +++++++++++------
 assess_birdnet/buowset_assess_birdnet.py    | 45 ++++++++++++---------
 3 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/assess_birdnet/README.md b/assess_birdnet/README.md
index bb27659..dc08fab 100644
--- a/assess_birdnet/README.md
+++ b/assess_birdnet/README.md
@@ -1,9 +1,9 @@
 Tools to assess BirdNET performance on Buowset
 
-First, one must pad the buowset detections that are shorter than 3s, to be
-a minimum of 3s. And then run BirdNET analyze over the entire dataset with
-the desired confidence thresholding and with burrowing owl as the only class
-in the species list.
+Segments must be a minimum of 3s in order to be assessed by Birdnet, buowsetv1.0
+needs padding but buowsetv1.1 are 3s without artificial padding. And then run 
+BirdNET analyze over the entire dataset with the desired confidence
+thresholding and with burrowing owl as the only class in the species list.
 
 Running aggregate_birdnet_buowset.py with the path to the BirdNET results and
 a .pkl file to send the result to will create a dataframe with the name of the
@@ -22,4 +22,4 @@ matrix comparing if BirdNET marked the instances of that class as burrowing owl.
 Because BirdNET is a binary classifier for burrowing owl in this data, a class
 by class comparison only tells us if it disproportionately misses certain calls
 more than others when looking for burrowing owls in general, ie it gives us a peak
-into the likely call distribution of their training data. 
+into the likely call distribution of their training data.
diff --git a/assess_birdnet/aggregate_birdnet_buowset.py b/assess_birdnet/aggregate_birdnet_buowset.py
index d920515..4b66105 100644
--- a/assess_birdnet/aggregate_birdnet_buowset.py
+++ b/assess_birdnet/aggregate_birdnet_buowset.py
@@ -11,14 +11,20 @@
     analyzer/folder/ /path/to/output.pkl
 """
 import argparse
-import pandas as pd
 import glob
 import os
 import ntpath
+import pandas as pd
 
 
 def parse_birdnet_analysis(birdnet):
     """Create dataframe from individual birdnet result files.
+
+    Args:
+        birdnet (str): Path to the birdnet results.
+
+    Returns:
+        pandas.DataFrame: Birdnet results as a single dataframe.
     """
     bn_dict = {}
     burowl_count = 0
@@ -26,38 +32,44 @@ def parse_birdnet_analysis(birdnet):
     for txt_file in result_files:
         filename = ntpath.basename(txt_file)
         filename = filename.replace("BirdNET.selection.table.txt", "wav")
-        with open(txt_file, 'r') as f:
-            header = f.readline().strip().split('\t')
-            data = pd.read_csv(f, header=None, names=header, delimiter='\t')
+        with open(txt_file, 'r') as file:
+            header = file.readline().strip().split('\t')
+            data = pd.read_csv(file, header=None, names=header, delimiter='\t')
         if any(data['Species Code'].str.lower() == 'burowl'):
             bn_dict[filename] = 1
             burowl_count += 1
-            print(f"Found another burrowing owl, new burowl count is {burowl_count}")
+            print(f"New burowl count is {burowl_count}")
         else:
             bn_dict[filename] = 0
     print("finished dict")
-    birdnet_df = pd.DataFrame.from_dict(bn_dict, orient='index', columns=['bn_label'])
+    birdnet_df = pd.DataFrame.from_dict(bn_dict,
+                                        orient='index',
+                                        columns=['bn_label'])
     birdnet_df.index.name = 'segment'
     return birdnet_df
 
 
 def main(birdnet, output):
     """Save out birdnet results to a dataframe.
+
+    Args:
+        birdnet (str): Path to the birdnet results files.
+        output (str): Filename for the output pkl.
     """
     birdnet_df = parse_birdnet_analysis(birdnet)
     birdnet_df.to_pickle(output)
     print(birdnet_df)
 
+
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
+    PARSER = argparse.ArgumentParser(
         description='Input CSV and model output'
         )
-    parser.add_argument('birdnet_analysis',
+    PARSER.add_argument('birdnet_analysis',
                         type=str,
                         help='Path to Birdnet analysis folder.')
-    parser.add_argument('output',
+    PARSER.add_argument('output',
                         type=str,
                         help='Path to desired output for result.')
-    args = parser.parse_args()
-    main(args.birdnet_analysis, args.output)
-
+    ARGS = PARSER.parse_args()
+    main(ARGS.birdnet_analysis, ARGS.output)
diff --git a/assess_birdnet/buowset_assess_birdnet.py b/assess_birdnet/buowset_assess_birdnet.py
index 1bd90d9..0983ca0 100644
--- a/assess_birdnet/buowset_assess_birdnet.py
+++ b/assess_birdnet/buowset_assess_birdnet.py
@@ -9,11 +9,11 @@
     /path/to/buowset/metadata.csv
 """
 import argparse
+import random
 import pandas as pd
-from sklearn.metrics import confusion_matrix, accuracy_score, precision_score
-from sklearn.metrics import recall_score, f1_score
+from sklearn.metrics import confusion_matrix, accuracy_score
+from sklearn.metrics import recall_score, f1_score, precision_score
 from comet_ml import Experiment
-import random
 
 
 def organize_birdnet_output(birdnet_results):
@@ -28,6 +28,7 @@ def organize_birdnet_output(birdnet_results):
     birdnet_df = pd.read_pickle(birdnet_results)
     return birdnet_df
 
+
 def merge_metadata(metadata, birdnet_df):
     """Combine metadata and birdnet results by segment.
 
@@ -43,12 +44,15 @@ def merge_metadata(metadata, birdnet_df):
 
     return df_merged
 
+
 def map_binary_labels(merged_data):
     """Obtain the two dataframes for the predicted and true labels.
 
     Args:
     """
-    y_true = merged_data['label'].map({0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 0}).values
+    y_true = merged_data['label'].map(
+        {0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 0}
+    ).values
     y_pred = merged_data['bn_label'].values
 
     return y_true, y_pred
@@ -58,7 +62,8 @@ def map_class_labels(merged_data, assess_class):
     """Create binary class vs no buow assessment for 1 class.
 
     Args:
-        merged_data (pd.DataFrame): The 
+        merged_data (pd.DataFrame): The birdnet label and ground truth
+            merged on segment name.
         assess_class (int): The number associated with the specific
             vocalization type to be assessed.
     """
@@ -120,8 +125,8 @@ def create_comet_exp():
     Returns:
         ()
     """
-    project = input("Enter the comet project name you'd like this experiment to have/be associated with: ")
-    work_space = input("Enter the comet workspace (username or organization) this experiment will go in: ")
+    project = input("Enter the comet project name: ")
+    work_space = input("Enter the comet workspace: ")
     experiment_name = input("Enter the name of this experiment: ")
     experiment = Experiment(
         project_name=project,
@@ -132,6 +137,7 @@ def create_comet_exp():
 
     return experiment
 
+
 def main(birdnet_results, metadata, single_class, assess_class):
     """Assess birdnet.
 
@@ -152,28 +158,31 @@ def main(birdnet_results, metadata, single_class, assess_class):
     print(f"Matching ground truth labels to BirdNET results.")
     merged_data = merge_metadata(metadata, birdnet_df)
     print("Comparing BirdNET labels to ground truth.")
-    if single_class == False:
+    if single_class is False:
         print("Doing binary buow/no_buow assessment")
         y_true, y_pred = map_binary_labels(merged_data)
     else:
-        print(f"Assessing performance of Birdnet on vocalization: {assess_class}")
+        print(f"Assessing performance of Birdnet on: {assess_class}")
         y_true, y_pred = map_class_labels(merged_data, assess_class)
     assess_birdnet(y_true, y_pred, experiment=experiment)
 
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
+    PARSER = argparse.ArgumentParser(
         description='Input Directory Path'
         )
-    parser.add_argument('birdnet_results',
+    PARSER.add_argument('birdnet_results',
                         type=str,
                         help='Path to Birdnet results for padded buowset.')
-    parser.add_argument('metadata',
+    PARSER.add_argument('metadata',
                         type=str,
                         help='Path to buowset metadata file.')
-    parser.add_argument('-single_class', action='store_true',
-                        help='Default true binary assessment, call for individual class assessment.')
-    parser.add_argument('-assess_class', default=None, type=int,
-                        help='Which class would you like to assess individually?')
-    args = parser.parse_args()
-    main(args.birdnet_results, args.metadata, args.single_class, args.assess_class)
+    PARSER.add_argument('-single_class', action='store_true',
+                        help='Call for individual class assessment.')
+    PARSER.add_argument('-assess_class', default=None, type=int,
+                        help='Which class would you like to assess?')
+    ARGS = PARSER.parse_args()
+    main(ARGS.birdnet_results,
+         ARGS.metadata,
+         ARGS.single_class,
+         ARGS.assess_class)

From 68c3c384f21d1973cf1bff2b35823210ff2761da Mon Sep 17 00:00:00 2001
From: Katie Garwood <kgarwood@sdzwa.org>
Date: Wed, 24 Sep 2025 10:16:46 -0700
Subject: [PATCH 8/8] add instructions for replicating birdnet usage

---
 assess_birdnet/README.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/assess_birdnet/README.md b/assess_birdnet/README.md
index dc08fab..73b382f 100644
--- a/assess_birdnet/README.md
+++ b/assess_birdnet/README.md
@@ -5,6 +5,17 @@ needs padding but buowsetv1.1 are 3s without artificial padding. And then run
 BirdNET analyze over the entire dataset with the desired confidence
 thresholding and with burrowing owl as the only class in the species list.
 
+To run Birdnet over your dataset, follow the instructions in this repo:
+https://birdnet-team.github.io/BirdNET-Analyzer/usage/cli.html#birdnet-analyzer-analyze
+
+We created our own class list with only our species of interest and ran
+birdnet_analyzer.analyze over the entire dataset, beginning with default
+confidence and sensitivity values. You can adjust these values and rerun 
+to obtain a comparison of performance across different confidence thresholds
+and sensitivity. Birdnet will give you a text file result for each audio file
+in your dataset, we had these text files saved to the same directory as the
+audio.
+
 Running aggregate_birdnet_buowset.py with the path to the BirdNET results and
 a .pkl file to send the result to will create a dataframe with the name of the
 wav file and a 0 for no buow and a 1 for yes buow detected by BirdNET.