diff --git a/.gitignore b/.gitignore index 18eee607..2c2f6d1a 100644 --- a/.gitignore +++ b/.gitignore @@ -139,4 +139,7 @@ cython_debug/ .vscode # Mac files -.DS_Store \ No newline at end of file +.DS_Store + +output +exp.ipynb \ No newline at end of file diff --git a/examples/speech_to_text/output/config.yaml b/examples/speech_to_text/output/config.yaml new file mode 100644 index 00000000..76d90882 --- /dev/null +++ b/examples/speech_to_text/output/config.yaml @@ -0,0 +1,2 @@ +source_type: speech +target_type: speech diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py new file mode 100644 index 00000000..92f59f3d --- /dev/null +++ b/examples/speech_to_text/visualize.py @@ -0,0 +1,92 @@ +import os +import pandas as pd +import re +import argparse +from pprint import pprint + + +def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"): + file_pattern = re.compile(file_pattern) + + for file in os.listdir(folder_path): + if file_pattern.search(file): + score_file_path = os.path.join(folder_path, file) + # if os.path.isfile(score_file_path): + with open(score_file_path, "r") as f: + contents = [ + line.strip() for line in f.read().split("\n") if line.strip() + ] + return contents + return None + + +def read_scores_files(output_folder, file_pattern=f"scores\.tsv$"): + all_contents = [] + + if not os.path.isdir(output_folder): + raise ValueError("Output folder does not exist") + + output_folder = os.path.abspath(output_folder) + + for folder in os.listdir(output_folder): + folder_path = os.path.join(output_folder, folder) + + if os.path.isdir(folder_path): + contents = read_scores_from_folder(folder_path, file_pattern) + if contents: + all_contents.append(contents) + + headers_list = [] + for contents in all_contents: + if contents: + header = contents[0].split() + if not header: + raise ValueError(f"Empty header in {contents}") + headers_list.append(header) + + return all_contents, headers_list + + +def process_result(output_folder, metric_names): + all_contents, headers_list = read_scores_files(output_folder) + + # Extracting headers from the first line of each "scores.tsv" file + reference_header = headers_list[0] + + if metric_names is None: + metric_names = reference_header + common_metrics = set(metric_names).intersection(reference_header) + + if not common_metrics: + raise ValueError("No common metrics found in the results") + + # Extracting scores for each metric + scores = [] + for contents in all_contents: + if contents: + values = dict(zip(contents[0].split(), contents[1].split())) + scores.append(values) + + df = pd.DataFrame(scores) + + # Fill NaN values with NaN + df = df.fillna("NaN") + filtered_df = df[df.columns[df.columns.isin(common_metrics)]] + + if len(common_metrics) == 1: + metric_name = list(common_metrics)[0] + filtered_df = filtered_df[filtered_df[metric_name] != 0.0] + + return filtered_df + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--output", type=str, default=None, help="Output directory") + parser.add_argument( + "--metrics", type=str, nargs="+", default=None, help="Metrics to be extracted" + ) + args = parser.parse_args() + + df = process_result(args.output, args.metrics) + pprint(df) diff --git a/simuleval/cli.py b/simuleval/cli.py index b54fe756..fa0ec2f2 100644 --- a/simuleval/cli.py +++ b/simuleval/cli.py @@ -9,6 +9,7 @@ from simuleval import options from simuleval.utils.agent import build_system_args from simuleval.utils.slurm import submit_slurm_job +from simuleval.utils.visualize import process_result from simuleval.utils.arguments import check_argument from simuleval.utils import EVALUATION_SYSTEM_LIST from simuleval.evaluator import ( @@ -40,6 +41,10 @@ def main(): scoring() return + if check_argument("visualize"): + visualize() + return + if check_argument("slurm"): submit_slurm_job() return @@ -99,5 +104,13 @@ def remote_evaluate(): evaluator.remote_eval() +def visualize(): + parser = options.general_parser() + options.add_visualize_args(parser) + args = parser.parse_args() + visualizer = process_result(args.output, args.metrics) + print(visualizer) + + if __name__ == "__main__": main() diff --git a/simuleval/evaluator/evaluator.py b/simuleval/evaluator/evaluator.py index a0e7e598..1e84db1a 100644 --- a/simuleval/evaluator/evaluator.py +++ b/simuleval/evaluator/evaluator.py @@ -7,6 +7,7 @@ import pandas import os import numbers +import datetime from argparse import Namespace from typing import Dict, Generator, Optional from .scorers import get_scorer_class @@ -213,6 +214,12 @@ def results(self): df = pandas.DataFrame(new_scores) return df + def create_output_dir(self) -> Path: + timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + output_directory = self.output / f"run-{timestamp}" + output_directory.mkdir(exist_ok=True, parents=True) + return output_directory + def dump_results(self) -> None: results = self.results if self.output: @@ -221,6 +228,9 @@ def dump_results(self) -> None: logger.info("Results:") print(results.to_string(index=False)) + logger.info("Results:") + print(results.to_string(index=False)) + def dump_metrics(self) -> None: metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()]) metrics = metrics.round(3) diff --git a/simuleval/options.py b/simuleval/options.py index 4c706e27..27e259ea 100644 --- a/simuleval/options.py +++ b/simuleval/options.py @@ -185,6 +185,13 @@ def general_parser(): "--device", type=str, default="cpu", help="Device to run the model." ) parser.add_argument("--fp16", action="store_true", default=False, help="Use fp16.") + + parser.add_argument( + "--visualize", + action="store_true", + default=False, + help="Visualize the results.", + ) return parser @@ -192,3 +199,19 @@ def add_slurm_args(parser): parser.add_argument("--slurm-partition", default="", help="Slurm partition.") parser.add_argument("--slurm-job-name", default="simuleval", help="Slurm job name.") parser.add_argument("--slurm-time", default="2:00:00", help="Slurm partition.") + + +def add_visualize_args(parser): + parser.add_argument( + "--output", + type=str, + default=None, + help="Output directory", + ) + parser.add_argument( + "--metrics", + type=str, + nargs="+", + default=None, + help="Metrics to be extracted", + ) diff --git a/simuleval/utils/visualize.py b/simuleval/utils/visualize.py new file mode 100644 index 00000000..ee4b6330 --- /dev/null +++ b/simuleval/utils/visualize.py @@ -0,0 +1,77 @@ +import os +import pandas as pd +import re + + +def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"): + file_pattern = re.compile(file_pattern) + + for file in os.listdir(folder_path): + if file_pattern.search(file): + score_file_path = os.path.join(folder_path, file) + with open(score_file_path, "r") as f: + contents = [ + line.strip() for line in f.read().split("\n") if line.strip() + ] + return contents + return None + + +def read_scores_files(output_folder, file_pattern=f"scores\.tsv$"): + all_contents = [] + + if not os.path.isdir(output_folder): + raise ValueError("Output folder does not exist") + + output_folder = os.path.abspath(output_folder) + + for folder in os.listdir(output_folder): + folder_path = os.path.join(output_folder, folder) + + if os.path.isdir(folder_path): + contents = read_scores_from_folder(folder_path, file_pattern) + if contents: + all_contents.append(contents) + + headers_list = [] + for contents in all_contents: + if contents: + header = contents[0].split() + if not header: + raise ValueError(f"Empty header in {contents}") + headers_list.append(header) + + return all_contents, headers_list + + +def process_result(output_folder, metric_names): + all_contents, headers_list = read_scores_files(output_folder) + + # Extracting headers from the first line of each "scores.tsv" file + reference_header = headers_list[0] + + if metric_names is None: + metric_names = reference_header + common_metrics = set(metric_names).intersection(reference_header) + + if not common_metrics: + raise ValueError("No common metrics found in the results") + + # Extracting scores for each metric + scores = [] + for contents in all_contents: + if contents: + values = dict(zip(contents[0].split(), contents[1].split())) + scores.append(values) + + df = pd.DataFrame(scores) + + # Fill NaN values with NaN + df = df.fillna("NaN") + filtered_df = df[df.columns[df.columns.isin(common_metrics)]] + + if len(common_metrics) == 1: + metric_name = list(common_metrics)[0] + filtered_df = filtered_df[filtered_df[metric_name] != 0.0] + + return filtered_df