Skip to content
This repository was archived by the owner on Sep 18, 2025. It is now read-only.
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,7 @@ cython_debug/
.vscode

# Mac files
.DS_Store
.DS_Store

output
exp.ipynb
2 changes: 2 additions & 0 deletions examples/speech_to_text/output/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
source_type: speech
target_type: speech
92 changes: 92 additions & 0 deletions examples/speech_to_text/visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import os
import pandas as pd
import re
import argparse
from pprint import pprint


def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"):
file_pattern = re.compile(file_pattern)

for file in os.listdir(folder_path):
if file_pattern.search(file):
score_file_path = os.path.join(folder_path, file)
# if os.path.isfile(score_file_path):
with open(score_file_path, "r") as f:
contents = [
line.strip() for line in f.read().split("\n") if line.strip()
]
return contents
return None


def read_scores_files(output_folder, file_pattern=f"scores\.tsv$"):
all_contents = []

if not os.path.isdir(output_folder):
raise ValueError("Output folder does not exist")

output_folder = os.path.abspath(output_folder)

for folder in os.listdir(output_folder):
folder_path = os.path.join(output_folder, folder)

if os.path.isdir(folder_path):
contents = read_scores_from_folder(folder_path, file_pattern)
if contents:
all_contents.append(contents)

headers_list = []
for contents in all_contents:
if contents:
header = contents[0].split()
if not header:
raise ValueError(f"Empty header in {contents}")
headers_list.append(header)

return all_contents, headers_list


def process_result(output_folder, metric_names):
all_contents, headers_list = read_scores_files(output_folder)

# Extracting headers from the first line of each "scores.tsv" file
reference_header = headers_list[0]

if metric_names is None:
metric_names = reference_header
common_metrics = set(metric_names).intersection(reference_header)

if not common_metrics:
raise ValueError("No common metrics found in the results")

# Extracting scores for each metric
scores = []
for contents in all_contents:
if contents:
values = dict(zip(contents[0].split(), contents[1].split()))
scores.append(values)

df = pd.DataFrame(scores)

# Fill NaN values with NaN
df = df.fillna("NaN")
filtered_df = df[df.columns[df.columns.isin(common_metrics)]]

if len(common_metrics) == 1:
metric_name = list(common_metrics)[0]
filtered_df = filtered_df[filtered_df[metric_name] != 0.0]

return filtered_df


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--output", type=str, default=None, help="Output directory")
parser.add_argument(
"--metrics", type=str, nargs="+", default=None, help="Metrics to be extracted"
)
args = parser.parse_args()

df = process_result(args.output, args.metrics)
pprint(df)
13 changes: 13 additions & 0 deletions simuleval/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from simuleval import options
from simuleval.utils.agent import build_system_args
from simuleval.utils.slurm import submit_slurm_job
from simuleval.utils.visualize import process_result
from simuleval.utils.arguments import check_argument
from simuleval.utils import EVALUATION_SYSTEM_LIST
from simuleval.evaluator import (
Expand Down Expand Up @@ -40,6 +41,10 @@ def main():
scoring()
return

if check_argument("visualize"):
visualize()
return

if check_argument("slurm"):
submit_slurm_job()
return
Expand Down Expand Up @@ -99,5 +104,13 @@ def remote_evaluate():
evaluator.remote_eval()


def visualize():
parser = options.general_parser()
options.add_visualize_args(parser)
args = parser.parse_args()
visualizer = process_result(args.output, args.metrics)
print(visualizer)


if __name__ == "__main__":
main()
10 changes: 10 additions & 0 deletions simuleval/evaluator/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pandas
import os
import numbers
import datetime
from argparse import Namespace
from typing import Dict, Generator, Optional
from .scorers import get_scorer_class
Expand Down Expand Up @@ -213,6 +214,12 @@ def results(self):
df = pandas.DataFrame(new_scores)
return df

def create_output_dir(self) -> Path:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
output_directory = self.output / f"run-{timestamp}"
output_directory.mkdir(exist_ok=True, parents=True)
return output_directory

def dump_results(self) -> None:
results = self.results
if self.output:
Expand All @@ -221,6 +228,9 @@ def dump_results(self) -> None:
logger.info("Results:")
print(results.to_string(index=False))

logger.info("Results:")
print(results.to_string(index=False))

def dump_metrics(self) -> None:
metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()])
metrics = metrics.round(3)
Expand Down
23 changes: 23 additions & 0 deletions simuleval/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,33 @@ def general_parser():
"--device", type=str, default="cpu", help="Device to run the model."
)
parser.add_argument("--fp16", action="store_true", default=False, help="Use fp16.")

parser.add_argument(
"--visualize",
action="store_true",
default=False,
help="Visualize the results.",
)
return parser


def add_slurm_args(parser):
parser.add_argument("--slurm-partition", default="", help="Slurm partition.")
parser.add_argument("--slurm-job-name", default="simuleval", help="Slurm job name.")
parser.add_argument("--slurm-time", default="2:00:00", help="Slurm partition.")


def add_visualize_args(parser):
parser.add_argument(
"--output",
type=str,
default=None,
help="Output directory",
)
parser.add_argument(
"--metrics",
type=str,
nargs="+",
default=None,
help="Metrics to be extracted",
)
77 changes: 77 additions & 0 deletions simuleval/utils/visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import os
import pandas as pd
import re


def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"):
file_pattern = re.compile(file_pattern)

for file in os.listdir(folder_path):
if file_pattern.search(file):
score_file_path = os.path.join(folder_path, file)
with open(score_file_path, "r") as f:
contents = [
line.strip() for line in f.read().split("\n") if line.strip()
]
return contents
return None


def read_scores_files(output_folder, file_pattern=f"scores\.tsv$"):
all_contents = []

if not os.path.isdir(output_folder):
raise ValueError("Output folder does not exist")

output_folder = os.path.abspath(output_folder)

for folder in os.listdir(output_folder):
folder_path = os.path.join(output_folder, folder)

if os.path.isdir(folder_path):
contents = read_scores_from_folder(folder_path, file_pattern)
if contents:
all_contents.append(contents)

headers_list = []
for contents in all_contents:
if contents:
header = contents[0].split()
if not header:
raise ValueError(f"Empty header in {contents}")
headers_list.append(header)

return all_contents, headers_list


def process_result(output_folder, metric_names):
all_contents, headers_list = read_scores_files(output_folder)

# Extracting headers from the first line of each "scores.tsv" file
reference_header = headers_list[0]

if metric_names is None:
metric_names = reference_header
common_metrics = set(metric_names).intersection(reference_header)

if not common_metrics:
raise ValueError("No common metrics found in the results")

# Extracting scores for each metric
scores = []
for contents in all_contents:
if contents:
values = dict(zip(contents[0].split(), contents[1].split()))
scores.append(values)

df = pd.DataFrame(scores)

# Fill NaN values with NaN
df = df.fillna("NaN")
filtered_df = df[df.columns[df.columns.isin(common_metrics)]]

if len(common_metrics) == 1:
metric_name = list(common_metrics)[0]
filtered_df = filtered_df[filtered_df[metric_name] != 0.0]

return filtered_df