This repository was archived by the owner on Sep 18, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 40
Add ability to visualize evaluation results #89
Draft
SamDewriter
wants to merge
24
commits into
facebookresearch:main
Choose a base branch
from
SamDewriter:visualize_feature
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
Show all changes
24 commits
Select commit
Hold shift + click to select a range
7dddb93
Testing Circleci on main
1527591
Testing Circleci on main
e31714b
Testing Circleci on main
a5a007e
Testing Circleci on main
d5890da
Testing Circleci on main
54936d2
correct Circle config
2352d22
correct Circle config
2ea30a9
correct Circle config
4377457
correct Circle config
6b646fc
Merge branch 'main' of https://github.com/SamDewriter/SimulEval
6dd12b9
Add visualize functionality
7d434d9
Ignore
9d42a66
Lint with Black
fbe6b95
Update examples/speech_to_text/visualize.py
SamDewriter a58bb51
Return error for files with no headers
170fb61
Return error for files with no headers
8505bf8
Format with black
5fbd227
Add visualize argument
7ca184d
Add visualize argument
2347380
Add visualization script to utils
d3a8649
Remove commented code
1d6c57b
Reformat with Black
38f8cb0
Remove circle
82fa90c
Revert back to initial
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -139,4 +139,7 @@ cython_debug/ | |
| .vscode | ||
|
|
||
| # Mac files | ||
| .DS_Store | ||
| .DS_Store | ||
|
|
||
| output | ||
| exp.ipynb | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| source_type: speech | ||
| target_type: speech |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,92 @@ | ||
| import os | ||
| import pandas as pd | ||
| import re | ||
| import argparse | ||
| from pprint import pprint | ||
|
|
||
|
|
||
| def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"): | ||
| file_pattern = re.compile(file_pattern) | ||
|
|
||
| for file in os.listdir(folder_path): | ||
| if file_pattern.search(file): | ||
| score_file_path = os.path.join(folder_path, file) | ||
| # if os.path.isfile(score_file_path): | ||
| with open(score_file_path, "r") as f: | ||
| contents = [ | ||
| line.strip() for line in f.read().split("\n") if line.strip() | ||
| ] | ||
| return contents | ||
| return None | ||
|
|
||
|
|
||
| def read_scores_files(output_folder, file_pattern=f"scores\.tsv$"): | ||
| all_contents = [] | ||
|
|
||
| if not os.path.isdir(output_folder): | ||
| raise ValueError("Output folder does not exist") | ||
|
|
||
| output_folder = os.path.abspath(output_folder) | ||
|
|
||
| for folder in os.listdir(output_folder): | ||
| folder_path = os.path.join(output_folder, folder) | ||
|
|
||
| if os.path.isdir(folder_path): | ||
| contents = read_scores_from_folder(folder_path, file_pattern) | ||
| if contents: | ||
| all_contents.append(contents) | ||
|
|
||
| headers_list = [] | ||
| for contents in all_contents: | ||
| if contents: | ||
| header = contents[0].split() | ||
| if not header: | ||
| raise ValueError(f"Empty header in {contents}") | ||
| headers_list.append(header) | ||
|
|
||
| return all_contents, headers_list | ||
|
|
||
|
|
||
| def process_result(output_folder, metric_names): | ||
SamDewriter marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| all_contents, headers_list = read_scores_files(output_folder) | ||
|
|
||
| # Extracting headers from the first line of each "scores.tsv" file | ||
| reference_header = headers_list[0] | ||
|
|
||
| if metric_names is None: | ||
| metric_names = reference_header | ||
| common_metrics = set(metric_names).intersection(reference_header) | ||
fissoreg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if not common_metrics: | ||
| raise ValueError("No common metrics found in the results") | ||
|
|
||
| # Extracting scores for each metric | ||
| scores = [] | ||
| for contents in all_contents: | ||
| if contents: | ||
| values = dict(zip(contents[0].split(), contents[1].split())) | ||
| scores.append(values) | ||
|
|
||
| df = pd.DataFrame(scores) | ||
|
|
||
| # Fill NaN values with NaN | ||
| df = df.fillna("NaN") | ||
| filtered_df = df[df.columns[df.columns.isin(common_metrics)]] | ||
|
|
||
| if len(common_metrics) == 1: | ||
| metric_name = list(common_metrics)[0] | ||
| filtered_df = filtered_df[filtered_df[metric_name] != 0.0] | ||
|
|
||
| return filtered_df | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| parser = argparse.ArgumentParser() | ||
| parser.add_argument("--output", type=str, default=None, help="Output directory") | ||
| parser.add_argument( | ||
| "--metrics", type=str, nargs="+", default=None, help="Metrics to be extracted" | ||
| ) | ||
| args = parser.parse_args() | ||
|
|
||
| df = process_result(args.output, args.metrics) | ||
| pprint(df) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,77 @@ | ||
| import os | ||
| import pandas as pd | ||
| import re | ||
|
|
||
|
|
||
| def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"): | ||
| file_pattern = re.compile(file_pattern) | ||
|
|
||
| for file in os.listdir(folder_path): | ||
| if file_pattern.search(file): | ||
| score_file_path = os.path.join(folder_path, file) | ||
| with open(score_file_path, "r") as f: | ||
| contents = [ | ||
| line.strip() for line in f.read().split("\n") if line.strip() | ||
| ] | ||
| return contents | ||
| return None | ||
|
|
||
|
|
||
| def read_scores_files(output_folder, file_pattern=f"scores\.tsv$"): | ||
| all_contents = [] | ||
|
|
||
| if not os.path.isdir(output_folder): | ||
| raise ValueError("Output folder does not exist") | ||
|
|
||
| output_folder = os.path.abspath(output_folder) | ||
|
|
||
| for folder in os.listdir(output_folder): | ||
| folder_path = os.path.join(output_folder, folder) | ||
|
|
||
| if os.path.isdir(folder_path): | ||
| contents = read_scores_from_folder(folder_path, file_pattern) | ||
| if contents: | ||
| all_contents.append(contents) | ||
|
|
||
| headers_list = [] | ||
| for contents in all_contents: | ||
| if contents: | ||
| header = contents[0].split() | ||
| if not header: | ||
| raise ValueError(f"Empty header in {contents}") | ||
| headers_list.append(header) | ||
|
|
||
| return all_contents, headers_list | ||
|
|
||
|
|
||
| def process_result(output_folder, metric_names): | ||
| all_contents, headers_list = read_scores_files(output_folder) | ||
|
|
||
| # Extracting headers from the first line of each "scores.tsv" file | ||
| reference_header = headers_list[0] | ||
|
|
||
| if metric_names is None: | ||
| metric_names = reference_header | ||
| common_metrics = set(metric_names).intersection(reference_header) | ||
|
|
||
| if not common_metrics: | ||
| raise ValueError("No common metrics found in the results") | ||
|
|
||
| # Extracting scores for each metric | ||
| scores = [] | ||
| for contents in all_contents: | ||
| if contents: | ||
| values = dict(zip(contents[0].split(), contents[1].split())) | ||
| scores.append(values) | ||
|
|
||
| df = pd.DataFrame(scores) | ||
|
|
||
| # Fill NaN values with NaN | ||
| df = df.fillna("NaN") | ||
| filtered_df = df[df.columns[df.columns.isin(common_metrics)]] | ||
|
|
||
| if len(common_metrics) == 1: | ||
| metric_name = list(common_metrics)[0] | ||
| filtered_df = filtered_df[filtered_df[metric_name] != 0.0] | ||
|
|
||
| return filtered_df |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.