From 7dddb935b43b655c77694417d1e6c232a3ca09e8 Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 19 Jul 2023 21:28:51 +0100 Subject: [PATCH 01/22] Testing Circleci on main --- .circleci/config.yml | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 00000000..3ea33ffa --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,43 @@ +version: 2.1 +jobs: + # build_test: + # docker: + # - image: cimg/python:3.11.0 + # - run: + # name: Install dependencies + # command: | # create whl and install dependencies + # python3 setup.py sdist bdist_wheel + # sudo add-apt-repository universe -y + # sudo apt-get update + # sudo apt install -y python3-pip + # sudo pip install pipenv + # sudo apt-get install -y python3-pip + # sudo pip3 install pytest + # - run: + # name: Run tests + # command: | + # pytest + test_pypi_publish: + docker: + - image: cimg/python:3.11.0 + steps: + - checkout + - run: + command: | + python3 setup.py sdist bdist_wheel + sudo add-apt-repository universe -y + sudo apt-get update + sudo apt-get install -y python3-pip + sudo pip install pipenv + pipenv install twine + pipenv run twine upload --repository-url testpypi dist/* +workflows: + version: 2 + build_test_publish: + jobs: + - test_pypi_publish: + # requires: + # - build_test + filters: + branches: + only: main From 15275912e8c03c8453744b833223b838e098cfdf Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 19 Jul 2023 21:39:58 +0100 Subject: [PATCH 02/22] Testing Circleci on main --- .circleci/config.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3ea33ffa..a3250afa 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,10 +18,13 @@ jobs: # command: | # pytest test_pypi_publish: + runs-on: ubuntu-latest docker: - image: cimg/python:3.11.0 steps: - - checkout + - name: Checkout code + uses: actions/checkout@v2 + - name: Build and publish to TestPyPI - run: command: | python3 setup.py sdist bdist_wheel From e31714bedd1effec7feb04a91dcf6d6ce03015c3 Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 19 Jul 2023 21:47:25 +0100 Subject: [PATCH 03/22] Testing Circleci on main --- .circleci/config.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a3250afa..e7981e01 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -19,14 +19,11 @@ jobs: # pytest test_pypi_publish: runs-on: ubuntu-latest - docker: - - image: cimg/python:3.11.0 steps: - name: Checkout code uses: actions/checkout@v2 - name: Build and publish to TestPyPI - run: - command: | python3 setup.py sdist bdist_wheel sudo add-apt-repository universe -y sudo apt-get update From a5a007efa8cb062bcfbd160942dee8a35914d89b Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 19 Jul 2023 21:51:40 +0100 Subject: [PATCH 04/22] Testing Circleci on main --- .circleci/config.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index e7981e01..0fd6de8c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -23,6 +23,10 @@ jobs: - name: Checkout code uses: actions/checkout@v2 - name: Build and publish to TestPyPI + - with: + username: __token__ + password: ${{ secrets.TEST_PYPI_TOKEN }} + repository_url: https://test.pypi.org/legacy/ - run: python3 setup.py sdist bdist_wheel sudo add-apt-repository universe -y From d5890da445ce89c8fb70f31f6fdd3d2cb66585ed Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 19 Jul 2023 22:00:33 +0100 Subject: [PATCH 05/22] Testing Circleci on main --- .circleci/config.yml | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0fd6de8c..96879761 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,31 +1,13 @@ version: 2.1 jobs: - # build_test: - # docker: - # - image: cimg/python:3.11.0 - # - run: - # name: Install dependencies - # command: | # create whl and install dependencies - # python3 setup.py sdist bdist_wheel - # sudo add-apt-repository universe -y - # sudo apt-get update - # sudo apt install -y python3-pip - # sudo pip install pipenv - # sudo apt-get install -y python3-pip - # sudo pip3 install pytest - # - run: - # name: Run tests - # command: | - # pytest - test_pypi_publish: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v2 - name: Build and publish to TestPyPI - - with: - username: __token__ - password: ${{ secrets.TEST_PYPI_TOKEN }} + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }} repository_url: https://test.pypi.org/legacy/ - run: python3 setup.py sdist bdist_wheel From 54936d237074b3905b95ca3e96a22178853a28fc Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Thu, 20 Jul 2023 09:03:20 +0100 Subject: [PATCH 06/22] correct Circle config --- .circleci/config.yml | 46 ++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 96879761..fe2afa3d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,29 +1,33 @@ -version: 2.1 +name: Build and Publish to TestPyPI +on: + push: + branches: + - main + jobs: + build_test_publish: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v2 - - name: Build and publish to TestPyPI - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }} - repository_url: https://test.pypi.org/legacy/ - - run: - python3 setup.py sdist bdist_wheel - sudo add-apt-repository universe -y + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.x + + - name: Install dependencies + run: | sudo apt-get update sudo apt-get install -y python3-pip - sudo pip install pipenv + pip install pipenv pipenv install twine - pipenv run twine upload --repository-url testpypi dist/* -workflows: - version: 2 - build_test_publish: - jobs: - - test_pypi_publish: - # requires: - # - build_test - filters: - branches: - only: main + + - name: Build and Publish + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }} + REPOSITORY_URL: https://test.pypi.org/legacy/ + run: | + python setup.py sdist bdist_wheel + pipenv run twine upload --repository-url $REPOSITORY_URL dist/* From 2352d22bc1ef4a19ae2ef43187205f90b784b190 Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Thu, 20 Jul 2023 09:08:16 +0100 Subject: [PATCH 07/22] correct Circle config --- .circleci/config.yml | 45 +++++++++++++------------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index fe2afa3d..e89e805a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,33 +1,14 @@ -name: Build and Publish to TestPyPI -on: - push: - branches: - - main - -jobs: - build_test_publish: - runs-on: ubuntu-latest +test_pypi_publish: + docker: + - image: cimg/python:3.11.0 steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: 3.x - - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y python3-pip - pip install pipenv - pipenv install twine - - - name: Build and Publish - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }} - REPOSITORY_URL: https://test.pypi.org/legacy/ - run: | - python setup.py sdist bdist_wheel - pipenv run twine upload --repository-url $REPOSITORY_URL dist/* + - checkout # checkout source code to working directory + - run: + command: | # create whl, install twine and publish to Test PyPI + python3 setup.py sdist bdist_wheel + sudo add-apt-repository universe -y + sudo apt-get update + sudo apt install -y python3-pip + sudo pip install pipenv + pipenv install twine + pipenv run twine upload --repository testpypi dist/* \ No newline at end of file From 2ea30a9c54e4d0b6fdb0383ad9939d4895238bb0 Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Thu, 20 Jul 2023 09:13:06 +0100 Subject: [PATCH 08/22] correct Circle config --- .circleci/config.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e89e805a..2ec034d6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,14 +1,14 @@ test_pypi_publish: - docker: - - image: cimg/python:3.11.0 - steps: - - checkout # checkout source code to working directory - - run: - command: | # create whl, install twine and publish to Test PyPI - python3 setup.py sdist bdist_wheel - sudo add-apt-repository universe -y - sudo apt-get update - sudo apt install -y python3-pip - sudo pip install pipenv - pipenv install twine - pipenv run twine upload --repository testpypi dist/* \ No newline at end of file + docker: + - image: cimg/python:3.11.0 + steps: + - checkout + - run: + command: | + python3 setup.py sdist bdist_wheel + sudo add-apt-repository universe -y + sudo apt-get update + sudo apt install -y python3-pip + sudo pip install pipenv + pipenv install twine + pipenv run twine upload --repository testpypi dist/* \ No newline at end of file From 4377457d8fe31812c141ed71efd32ccbdf27ea98 Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Thu, 20 Jul 2023 09:14:23 +0100 Subject: [PATCH 09/22] correct Circle config --- .circleci/config.yml | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2ec034d6..f0d9287b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,14 +1,34 @@ -test_pypi_publish: - docker: - - image: cimg/python:3.11.0 - steps: - - checkout - - run: - command: | - python3 setup.py sdist bdist_wheel - sudo add-apt-repository universe -y +name: TestPyPI Publish + +on: + push: + branches: + - main + +jobs: + test_pypi_publish: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.x + + - name: Install dependencies + run: | sudo apt-get update - sudo apt install -y python3-pip - sudo pip install pipenv + sudo apt-get install -y python3-pip + pip install pipenv pipenv install twine - pipenv run twine upload --repository testpypi dist/* \ No newline at end of file + + - name: Build and Publish + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }} + REPOSITORY_URL: https://test.pypi.org/legacy/ + run: | + python setup.py sdist bdist_wheel + pipenv run twine upload --repository-url $REPOSITORY_URL dist/* From 6dd12b9236a07fb382f6af029740651d42c78c9e Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Fri, 17 Nov 2023 20:18:36 +0100 Subject: [PATCH 10/22] Add visualize functionality --- examples/speech_to_text/output/config.yaml | 2 + examples/speech_to_text/visualize.py | 74 ++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 examples/speech_to_text/output/config.yaml create mode 100644 examples/speech_to_text/visualize.py diff --git a/examples/speech_to_text/output/config.yaml b/examples/speech_to_text/output/config.yaml new file mode 100644 index 00000000..76d90882 --- /dev/null +++ b/examples/speech_to_text/output/config.yaml @@ -0,0 +1,2 @@ +source_type: speech +target_type: speech diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py new file mode 100644 index 00000000..627e5da9 --- /dev/null +++ b/examples/speech_to_text/visualize.py @@ -0,0 +1,74 @@ +import os +import pandas as pd +import argparse +import pprint + +def read_scores_from_folder(folder_path): + score_file_path = os.path.join(folder_path, 'scores.tsv') + if os.path.isfile(score_file_path): + with open(score_file_path, "r") as f: + contents = [line.strip() for line in f.read().split("\n") if line.strip()] + return contents + else: + return None + +def read_scores_files(output_folder): + all_contents = [] + + if not os.path.isdir(output_folder): + raise ValueError("Output folder does not exist") + + output_folder = os.path.abspath(output_folder) + + for folder in os.listdir(output_folder): + folder_path = os.path.join(output_folder, folder) + + if os.path.isdir(folder_path): + contents = read_scores_from_folder(folder_path) + if contents: + all_contents.append(contents) + return all_contents + +def process_result(output_folder, metric_names): + all_contents = read_scores_files(output_folder) + + # Extracting headers from the first line of each "scores.tsv" file + headers = [contents[0].split() for contents in all_contents if contents] + + if not headers: + raise ValueError("No headers found in the results") + reference_header = headers[0] + + if metric_names is None: + metric_names = reference_header + common_metrics = set(metric_names).intersection(reference_header) + + if not common_metrics: + raise ValueError("No common metrics found in the results") + + # Extracting scores for each metric + scores = [] + for contents in all_contents: + if contents: + values = dict(zip(contents[0].split(), contents[1].split())) + scores.append(values) + + df = pd.DataFrame(scores) + + df = df.fillna(0.0) + filtered_df = df[df.columns[df.columns.isin(common_metrics)]] + + if len(common_metrics) == 1: + metric_name = list(common_metrics)[0] + filtered_df = filtered_df[filtered_df[metric_name] != 0.0] + + return filtered_df + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--output", type=str, default=None, help="Output directory") + parser.add_argument("--metrics", type=str, nargs="+", default=None, help="Metrics to be extracted") + args = parser.parse_args() + + df = process_result(args.output, args.metrics) + pprint.pprint(df) \ No newline at end of file From 7d434d9ecffbbd44be7af6c1ad0141acb158dc2a Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Fri, 17 Nov 2023 20:19:07 +0100 Subject: [PATCH 11/22] Ignore --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 18eee607..2c2f6d1a 100644 --- a/.gitignore +++ b/.gitignore @@ -139,4 +139,7 @@ cython_debug/ .vscode # Mac files -.DS_Store \ No newline at end of file +.DS_Store + +output +exp.ipynb \ No newline at end of file From 9d42a66e49a05eb75b3dbf9d3e8589dd6617738b Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 22 Nov 2023 18:30:35 +0100 Subject: [PATCH 12/22] Lint with Black --- examples/speech_to_text/visualize.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py index 627e5da9..0491d32b 100644 --- a/examples/speech_to_text/visualize.py +++ b/examples/speech_to_text/visualize.py @@ -3,21 +3,23 @@ import argparse import pprint + def read_scores_from_folder(folder_path): - score_file_path = os.path.join(folder_path, 'scores.tsv') + score_file_path = os.path.join(folder_path, "scores.tsv") if os.path.isfile(score_file_path): with open(score_file_path, "r") as f: contents = [line.strip() for line in f.read().split("\n") if line.strip()] return contents else: return None - + + def read_scores_files(output_folder): all_contents = [] if not os.path.isdir(output_folder): raise ValueError("Output folder does not exist") - + output_folder = os.path.abspath(output_folder) for folder in os.listdir(output_folder): @@ -29,12 +31,13 @@ def read_scores_files(output_folder): all_contents.append(contents) return all_contents + def process_result(output_folder, metric_names): all_contents = read_scores_files(output_folder) - + # Extracting headers from the first line of each "scores.tsv" file headers = [contents[0].split() for contents in all_contents if contents] - + if not headers: raise ValueError("No headers found in the results") reference_header = headers[0] @@ -45,7 +48,7 @@ def process_result(output_folder, metric_names): if not common_metrics: raise ValueError("No common metrics found in the results") - + # Extracting scores for each metric scores = [] for contents in all_contents: @@ -64,11 +67,14 @@ def process_result(output_folder, metric_names): return filtered_df + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--output", type=str, default=None, help="Output directory") - parser.add_argument("--metrics", type=str, nargs="+", default=None, help="Metrics to be extracted") + parser.add_argument( + "--metrics", type=str, nargs="+", default=None, help="Metrics to be extracted" + ) args = parser.parse_args() df = process_result(args.output, args.metrics) - pprint.pprint(df) \ No newline at end of file + pprint.pprint(df) From fbe6b956e5f9d8548b11ffa7ddc3db28ef32566a Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <55236862+SamDewriter@users.noreply.github.com> Date: Fri, 24 Nov 2023 16:44:34 +0100 Subject: [PATCH 13/22] Update examples/speech_to_text/visualize.py Add none to metric names Co-authored-by: Giancarlo Fissore --- examples/speech_to_text/visualize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py index 0491d32b..4937d8a6 100644 --- a/examples/speech_to_text/visualize.py +++ b/examples/speech_to_text/visualize.py @@ -32,7 +32,7 @@ def read_scores_files(output_folder): return all_contents -def process_result(output_folder, metric_names): +def process_result(output_folder, metric_names=None): all_contents = read_scores_files(output_folder) # Extracting headers from the first line of each "scores.tsv" file From a58bb51fd590d37354aa085c89d0b07f81e75aa9 Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 29 Nov 2023 10:10:16 +0100 Subject: [PATCH 14/22] Return error for files with no headers --- examples/speech_to_text/visualize.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py index 0491d32b..a2fc1e80 100644 --- a/examples/speech_to_text/visualize.py +++ b/examples/speech_to_text/visualize.py @@ -1,7 +1,7 @@ import os import pandas as pd import argparse -import pprint +from pprint import pprint def read_scores_from_folder(folder_path): @@ -29,18 +29,24 @@ def read_scores_files(output_folder): contents = read_scores_from_folder(folder_path) if contents: all_contents.append(contents) - return all_contents + + headers_list = [] + for contents in all_contents: + if contents: + header = contents[0].split() + if not header: + raise ValueError(f"Empty header in {contents}") + headers_list.append(header) + + return all_contents, headers_list -def process_result(output_folder, metric_names): - all_contents = read_scores_files(output_folder) +def process_result(output_folder, metric_names=None): + all_contents, headers_list = read_scores_files(output_folder) # Extracting headers from the first line of each "scores.tsv" file - headers = [contents[0].split() for contents in all_contents if contents] - if not headers: - raise ValueError("No headers found in the results") - reference_header = headers[0] + reference_header = headers_list[0] if metric_names is None: metric_names = reference_header @@ -58,7 +64,8 @@ def process_result(output_folder, metric_names): df = pd.DataFrame(scores) - df = df.fillna(0.0) + # Fill NaN values with NaN + df = df.fillna("NaN") filtered_df = df[df.columns[df.columns.isin(common_metrics)]] if len(common_metrics) == 1: @@ -77,4 +84,4 @@ def process_result(output_folder, metric_names): args = parser.parse_args() df = process_result(args.output, args.metrics) - pprint.pprint(df) + pprint(df) From 8505bf841ba1de492206675cc14890c6cbc052dd Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 29 Nov 2023 10:14:36 +0100 Subject: [PATCH 15/22] Format with black --- examples/speech_to_text/visualize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py index 77a6f83a..cfdabcc0 100644 --- a/examples/speech_to_text/visualize.py +++ b/examples/speech_to_text/visualize.py @@ -43,7 +43,7 @@ def read_scores_files(output_folder): def process_result(output_folder, metric_names): all_contents, headers_list = read_scores_files(output_folder) - + # Extracting headers from the first line of each "scores.tsv" file reference_header = headers_list[0] From 5fbd227efaa05a77f9617973b72c25fc6abfc56f Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 29 Nov 2023 15:08:37 +0100 Subject: [PATCH 16/22] Add visualize argument --- simuleval/cli.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/simuleval/cli.py b/simuleval/cli.py index b54fe756..a3796a12 100644 --- a/simuleval/cli.py +++ b/simuleval/cli.py @@ -9,6 +9,7 @@ from simuleval import options from simuleval.utils.agent import build_system_args from simuleval.utils.slurm import submit_slurm_job +from simuleval.utils.visualize import process_result from simuleval.utils.arguments import check_argument from simuleval.utils import EVALUATION_SYSTEM_LIST from simuleval.evaluator import ( @@ -39,6 +40,10 @@ def main(): if check_argument("score_only"): scoring() return + + if check_argument("visualize"): + visualize() + return if check_argument("slurm"): submit_slurm_job() @@ -98,6 +103,12 @@ def remote_evaluate(): # evaluate system evaluator.remote_eval() +def visualize(): + parser = options.general_parser() + options.add_visualize_args(parser) + args = parser.parse_args() + visualizer = process_result(args.output, args.metrics) + print(visualizer) if __name__ == "__main__": main() From 7ca184d7b9752dc4357967ca56bea1fefd204953 Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 29 Nov 2023 15:09:00 +0100 Subject: [PATCH 17/22] Add visualize argument --- simuleval/options.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/simuleval/options.py b/simuleval/options.py index 4c706e27..f65f29e8 100644 --- a/simuleval/options.py +++ b/simuleval/options.py @@ -185,10 +185,33 @@ def general_parser(): "--device", type=str, default="cpu", help="Device to run the model." ) parser.add_argument("--fp16", action="store_true", default=False, help="Use fp16.") + + parser.add_argument( + "--visualize", + action="store_true", + default=False, + help="Visualize the results.", + ) return parser + def add_slurm_args(parser): parser.add_argument("--slurm-partition", default="", help="Slurm partition.") parser.add_argument("--slurm-job-name", default="simuleval", help="Slurm job name.") parser.add_argument("--slurm-time", default="2:00:00", help="Slurm partition.") + +def add_visualize_args(parser): + parser.add_argument( + "--output", + type=str, + default=None, + help="Output directory", + ) + parser.add_argument( + "--metrics", + type=str, + nargs="+", + default=None, + help="Metrics to be extracted", + ) \ No newline at end of file From 23473800691f44941ce656ac860dfb8223258ef6 Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 29 Nov 2023 15:09:37 +0100 Subject: [PATCH 18/22] Add visualization script to utils --- simuleval/utils/visualize.py | 77 ++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 simuleval/utils/visualize.py diff --git a/simuleval/utils/visualize.py b/simuleval/utils/visualize.py new file mode 100644 index 00000000..bc080571 --- /dev/null +++ b/simuleval/utils/visualize.py @@ -0,0 +1,77 @@ +import os +import pandas as pd +import re + +def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"): + file_pattern = re.compile(file_pattern) + + for file in os.listdir(folder_path): + if file_pattern.search(file): + score_file_path = os.path.join(folder_path, file) + # if os.path.isfile(score_file_path): + with open(score_file_path, "r") as f: + contents = [ + line.strip() for line in f.read().split("\n") if line.strip() + ] + return contents + return None + + +def read_scores_files(output_folder, file_pattern=f"scores\.tsv$"): + all_contents = [] + + if not os.path.isdir(output_folder): + raise ValueError("Output folder does not exist") + + output_folder = os.path.abspath(output_folder) + + for folder in os.listdir(output_folder): + folder_path = os.path.join(output_folder, folder) + + if os.path.isdir(folder_path): + contents = read_scores_from_folder(folder_path, file_pattern) + if contents: + all_contents.append(contents) + + headers_list = [] + for contents in all_contents: + if contents: + header = contents[0].split() + if not header: + raise ValueError(f"Empty header in {contents}") + headers_list.append(header) + + return all_contents, headers_list + + +def process_result(output_folder, metric_names): + all_contents, headers_list = read_scores_files(output_folder) + + # Extracting headers from the first line of each "scores.tsv" file + reference_header = headers_list[0] + + if metric_names is None: + metric_names = reference_header + common_metrics = set(metric_names).intersection(reference_header) + + if not common_metrics: + raise ValueError("No common metrics found in the results") + + # Extracting scores for each metric + scores = [] + for contents in all_contents: + if contents: + values = dict(zip(contents[0].split(), contents[1].split())) + scores.append(values) + + df = pd.DataFrame(scores) + + # Fill NaN values with NaN + df = df.fillna("NaN") + filtered_df = df[df.columns[df.columns.isin(common_metrics)]] + + if len(common_metrics) == 1: + metric_name = list(common_metrics)[0] + filtered_df = filtered_df[filtered_df[metric_name] != 0.0] + + return filtered_df From d3a86495849e4d661db2065c19763474f40aa939 Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 29 Nov 2023 15:11:11 +0100 Subject: [PATCH 19/22] Remove commented code --- simuleval/utils/visualize.py | 1 - 1 file changed, 1 deletion(-) diff --git a/simuleval/utils/visualize.py b/simuleval/utils/visualize.py index bc080571..469df7cd 100644 --- a/simuleval/utils/visualize.py +++ b/simuleval/utils/visualize.py @@ -8,7 +8,6 @@ def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"): for file in os.listdir(folder_path): if file_pattern.search(file): score_file_path = os.path.join(folder_path, file) - # if os.path.isfile(score_file_path): with open(score_file_path, "r") as f: contents = [ line.strip() for line in f.read().split("\n") if line.strip() From 1d6c57bcdb94b638982cc35c70ae7b58ff7566b8 Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 29 Nov 2023 15:12:13 +0100 Subject: [PATCH 20/22] Reformat with Black --- examples/speech_to_text/visualize.py | 26 ++++++++------ simuleval/cli.py | 4 ++- simuleval/evaluator/evaluator.py | 51 ++++++++++++++++++++++------ simuleval/options.py | 4 +-- simuleval/utils/visualize.py | 1 + 5 files changed, 63 insertions(+), 23 deletions(-) diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py index cfdabcc0..92f59f3d 100644 --- a/examples/speech_to_text/visualize.py +++ b/examples/speech_to_text/visualize.py @@ -1,20 +1,26 @@ import os import pandas as pd +import re import argparse from pprint import pprint -def read_scores_from_folder(folder_path): - score_file_path = os.path.join(folder_path, "scores.tsv") - if os.path.isfile(score_file_path): - with open(score_file_path, "r") as f: - contents = [line.strip() for line in f.read().split("\n") if line.strip()] - return contents - else: - return None +def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"): + file_pattern = re.compile(file_pattern) + for file in os.listdir(folder_path): + if file_pattern.search(file): + score_file_path = os.path.join(folder_path, file) + # if os.path.isfile(score_file_path): + with open(score_file_path, "r") as f: + contents = [ + line.strip() for line in f.read().split("\n") if line.strip() + ] + return contents + return None -def read_scores_files(output_folder): + +def read_scores_files(output_folder, file_pattern=f"scores\.tsv$"): all_contents = [] if not os.path.isdir(output_folder): @@ -26,7 +32,7 @@ def read_scores_files(output_folder): folder_path = os.path.join(output_folder, folder) if os.path.isdir(folder_path): - contents = read_scores_from_folder(folder_path) + contents = read_scores_from_folder(folder_path, file_pattern) if contents: all_contents.append(contents) diff --git a/simuleval/cli.py b/simuleval/cli.py index a3796a12..fa0ec2f2 100644 --- a/simuleval/cli.py +++ b/simuleval/cli.py @@ -40,7 +40,7 @@ def main(): if check_argument("score_only"): scoring() return - + if check_argument("visualize"): visualize() return @@ -103,6 +103,7 @@ def remote_evaluate(): # evaluate system evaluator.remote_eval() + def visualize(): parser = options.general_parser() options.add_visualize_args(parser) @@ -110,5 +111,6 @@ def visualize(): visualizer = process_result(args.output, args.metrics) print(visualizer) + if __name__ == "__main__": main() diff --git a/simuleval/evaluator/evaluator.py b/simuleval/evaluator/evaluator.py index a0e7e598..b017bea7 100644 --- a/simuleval/evaluator/evaluator.py +++ b/simuleval/evaluator/evaluator.py @@ -7,6 +7,7 @@ import pandas import os import numbers +import datetime from argparse import Namespace from typing import Dict, Generator, Optional from .scorers import get_scorer_class @@ -213,19 +214,48 @@ def results(self): df = pandas.DataFrame(new_scores) return df - def dump_results(self) -> None: + def create_output_dir(self) -> Path: + timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + output_directory = self.output / f"run-{timestamp}" + output_directory.mkdir(exist_ok=True, parents=True) + return output_directory + + def dump_results_and_metrics(self) -> None: results = self.results - if self.output: - results.to_csv(self.output / "scores.tsv", sep="\t", index=False) + metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()]) + metrics = metrics.round(3) + + output_folder = self.create_output_dir() + + results_filename = "scores.tsv" + metrics_filename = "metrics.tsv" + + results.to_csv(output_folder / results_filename, sep="\t", index=False) + metrics.to_csv(output_folder / metrics_filename, sep="\t", index=False) logger.info("Results:") print(results.to_string(index=False)) - def dump_metrics(self) -> None: - metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()]) - metrics = metrics.round(3) - if self.output: - metrics.to_csv(self.output / "metrics.tsv", sep="\t", index=False) + # def dump_results(self) -> None: + # results = self.results + + # timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + # filename = f"results-{timestamp}.tsv" + + # output_directory = self.output or Path(".") + + # if self.output: + # output_directory = os.path.join(output_directory, filename) + # results.to_csv(os.path.join(self.output, filename), sep="\t", index=False) + + # logger.info("Results:") + # print(results.to_string(index=False)) + + # def dump_metrics(self) -> None: + # metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()]) + # metrics = metrics.round(3) + # if self.output: + # metrics.to_csv(self.output / "metrics.tsv", sep="\t", index=False) def is_finished(self, instance) -> bool: if hasattr(instance, "source_finished_reading"): @@ -250,8 +280,9 @@ def __call__(self, system): if not self.score_only: self.write_log(instance) - self.dump_results() - self.dump_metrics() + # self.dump_results() + # self.dump_metrics() + self.dump_results_and_metrics() @classmethod def from_args(cls, args): diff --git a/simuleval/options.py b/simuleval/options.py index f65f29e8..27e259ea 100644 --- a/simuleval/options.py +++ b/simuleval/options.py @@ -195,12 +195,12 @@ def general_parser(): return parser - def add_slurm_args(parser): parser.add_argument("--slurm-partition", default="", help="Slurm partition.") parser.add_argument("--slurm-job-name", default="simuleval", help="Slurm job name.") parser.add_argument("--slurm-time", default="2:00:00", help="Slurm partition.") + def add_visualize_args(parser): parser.add_argument( "--output", @@ -214,4 +214,4 @@ def add_visualize_args(parser): nargs="+", default=None, help="Metrics to be extracted", - ) \ No newline at end of file + ) diff --git a/simuleval/utils/visualize.py b/simuleval/utils/visualize.py index 469df7cd..ee4b6330 100644 --- a/simuleval/utils/visualize.py +++ b/simuleval/utils/visualize.py @@ -2,6 +2,7 @@ import pandas as pd import re + def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"): file_pattern = re.compile(file_pattern) From 38f8cb01b91d96708f846528b9940dc5a0c6d323 Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 29 Nov 2023 15:15:33 +0100 Subject: [PATCH 21/22] Remove circle --- .circleci/config.yml | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index f0d9287b..00000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: TestPyPI Publish - -on: - push: - branches: - - main - -jobs: - test_pypi_publish: - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: 3.x - - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y python3-pip - pip install pipenv - pipenv install twine - - - name: Build and Publish - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }} - REPOSITORY_URL: https://test.pypi.org/legacy/ - run: | - python setup.py sdist bdist_wheel - pipenv run twine upload --repository-url $REPOSITORY_URL dist/* From 82fa90c056e9e31d790629822d8b8eaa6a0ed5b7 Mon Sep 17 00:00:00 2001 From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com> Date: Wed, 29 Nov 2023 15:29:13 +0100 Subject: [PATCH 22/22] Revert back to initial --- simuleval/evaluator/evaluator.py | 45 +++++++++----------------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/simuleval/evaluator/evaluator.py b/simuleval/evaluator/evaluator.py index b017bea7..1e84db1a 100644 --- a/simuleval/evaluator/evaluator.py +++ b/simuleval/evaluator/evaluator.py @@ -220,42 +220,22 @@ def create_output_dir(self) -> Path: output_directory.mkdir(exist_ok=True, parents=True) return output_directory - def dump_results_and_metrics(self) -> None: + def dump_results(self) -> None: results = self.results - metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()]) - metrics = metrics.round(3) - - output_folder = self.create_output_dir() - - results_filename = "scores.tsv" - metrics_filename = "metrics.tsv" - - results.to_csv(output_folder / results_filename, sep="\t", index=False) - metrics.to_csv(output_folder / metrics_filename, sep="\t", index=False) + if self.output: + results.to_csv(self.output / "scores.tsv", sep="\t", index=False) logger.info("Results:") print(results.to_string(index=False)) - # def dump_results(self) -> None: - # results = self.results - - # timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - # filename = f"results-{timestamp}.tsv" - - # output_directory = self.output or Path(".") - - # if self.output: - # output_directory = os.path.join(output_directory, filename) - # results.to_csv(os.path.join(self.output, filename), sep="\t", index=False) - - # logger.info("Results:") - # print(results.to_string(index=False)) + logger.info("Results:") + print(results.to_string(index=False)) - # def dump_metrics(self) -> None: - # metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()]) - # metrics = metrics.round(3) - # if self.output: - # metrics.to_csv(self.output / "metrics.tsv", sep="\t", index=False) + def dump_metrics(self) -> None: + metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()]) + metrics = metrics.round(3) + if self.output: + metrics.to_csv(self.output / "metrics.tsv", sep="\t", index=False) def is_finished(self, instance) -> bool: if hasattr(instance, "source_finished_reading"): @@ -280,9 +260,8 @@ def __call__(self, system): if not self.score_only: self.write_log(instance) - # self.dump_results() - # self.dump_metrics() - self.dump_results_and_metrics() + self.dump_results() + self.dump_metrics() @classmethod def from_args(cls, args):