From 7dddb935b43b655c77694417d1e6c232a3ca09e8 Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 19 Jul 2023 21:28:51 +0100
Subject: [PATCH 01/22] Testing Circleci on main

---
 .circleci/config.yml | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 .circleci/config.yml

diff --git a/.circleci/config.yml b/.circleci/config.yml
new file mode 100644
index 00000000..3ea33ffa
--- /dev/null
+++ b/.circleci/config.yml
@@ -0,0 +1,43 @@
+version: 2.1
+jobs:
+  # build_test:
+  #   docker:
+  #     - image: cimg/python:3.11.0
+  #     - run:
+  #         name: Install dependencies
+  #         command: | # create whl and install dependencies
+  #           python3 setup.py sdist bdist_wheel
+  #           sudo add-apt-repository universe -y
+  #           sudo apt-get update
+  #           sudo apt install -y python3-pip
+  #           sudo pip install pipenv
+  #           sudo apt-get install -y python3-pip
+  #           sudo pip3 install pytest
+  #     - run:
+  #         name: Run tests
+  #         command: |
+  #           pytest
+  test_pypi_publish:
+    docker:
+      - image: cimg/python:3.11.0
+    steps:
+      - checkout
+      - run:
+        command: |
+          python3 setup.py sdist bdist_wheel
+          sudo add-apt-repository universe -y
+          sudo apt-get update
+          sudo apt-get install -y python3-pip
+          sudo pip install pipenv
+          pipenv install twine
+          pipenv run twine upload --repository-url testpypi dist/*
+workflows:
+  version: 2
+  build_test_publish:
+    jobs:
+      - test_pypi_publish:
+          # requires:
+          #   - build_test
+          filters:
+            branches:
+              only: main

From 15275912e8c03c8453744b833223b838e098cfdf Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 19 Jul 2023 21:39:58 +0100
Subject: [PATCH 02/22] Testing Circleci on main

---
 .circleci/config.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 3ea33ffa..a3250afa 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -18,10 +18,13 @@ jobs:
   #         command: |
   #           pytest
   test_pypi_publish:
+    runs-on: ubuntu-latest
     docker:
       - image: cimg/python:3.11.0
     steps:
-      - checkout
+      - name: Checkout code
+        uses: actions/checkout@v2
+      - name: Build and publish to TestPyPI
       - run:
         command: |
           python3 setup.py sdist bdist_wheel

From e31714bedd1effec7feb04a91dcf6d6ce03015c3 Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 19 Jul 2023 21:47:25 +0100
Subject: [PATCH 03/22] Testing Circleci on main

---
 .circleci/config.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index a3250afa..e7981e01 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -19,14 +19,11 @@ jobs:
   #           pytest
   test_pypi_publish:
     runs-on: ubuntu-latest
-    docker:
-      - image: cimg/python:3.11.0
     steps:
       - name: Checkout code
         uses: actions/checkout@v2
       - name: Build and publish to TestPyPI
       - run:
-        command: |
           python3 setup.py sdist bdist_wheel
           sudo add-apt-repository universe -y
           sudo apt-get update

From a5a007efa8cb062bcfbd160942dee8a35914d89b Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 19 Jul 2023 21:51:40 +0100
Subject: [PATCH 04/22] Testing Circleci on main

---
 .circleci/config.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index e7981e01..0fd6de8c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -23,6 +23,10 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v2
       - name: Build and publish to TestPyPI
+      - with:
+          username: __token__
+          password: ${{ secrets.TEST_PYPI_TOKEN }}
+          repository_url: https://test.pypi.org/legacy/
       - run:
           python3 setup.py sdist bdist_wheel
           sudo add-apt-repository universe -y

From d5890da445ce89c8fb70f31f6fdd3d2cb66585ed Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 19 Jul 2023 22:00:33 +0100
Subject: [PATCH 05/22] Testing Circleci on main

---
 .circleci/config.yml | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 0fd6de8c..96879761 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,31 +1,13 @@
 version: 2.1
 jobs:
-  # build_test:
-  #   docker:
-  #     - image: cimg/python:3.11.0
-  #     - run:
-  #         name: Install dependencies
-  #         command: | # create whl and install dependencies
-  #           python3 setup.py sdist bdist_wheel
-  #           sudo add-apt-repository universe -y
-  #           sudo apt-get update
-  #           sudo apt install -y python3-pip
-  #           sudo pip install pipenv
-  #           sudo apt-get install -y python3-pip
-  #           sudo pip3 install pytest
-  #     - run:
-  #         name: Run tests
-  #         command: |
-  #           pytest
-  test_pypi_publish:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
         uses: actions/checkout@v2
       - name: Build and publish to TestPyPI
-      - with:
-          username: __token__
-          password: ${{ secrets.TEST_PYPI_TOKEN }}
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }}
           repository_url: https://test.pypi.org/legacy/
       - run:
           python3 setup.py sdist bdist_wheel

From 54936d237074b3905b95ca3e96a22178853a28fc Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Thu, 20 Jul 2023 09:03:20 +0100
Subject: [PATCH 06/22] correct Circle config

---
 .circleci/config.yml | 46 ++++++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 96879761..fe2afa3d 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,29 +1,33 @@
-version: 2.1
+name: Build and Publish to TestPyPI
+on:
+  push:
+    branches:
+      - main
+
 jobs:
+  build_test_publish:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
         uses: actions/checkout@v2
-      - name: Build and publish to TestPyPI
-        env:
-          TWINE_USERNAME: __token__
-          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }}
-          repository_url: https://test.pypi.org/legacy/
-      - run:
-          python3 setup.py sdist bdist_wheel
-          sudo add-apt-repository universe -y
+
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.x
+
+      - name: Install dependencies
+        run: |
           sudo apt-get update
           sudo apt-get install -y python3-pip
-          sudo pip install pipenv
+          pip install pipenv
           pipenv install twine
-          pipenv run twine upload --repository-url testpypi dist/*
-workflows:
-  version: 2
-  build_test_publish:
-    jobs:
-      - test_pypi_publish:
-          # requires:
-          #   - build_test
-          filters:
-            branches:
-              only: main
+
+      - name: Build and Publish
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }}
+          REPOSITORY_URL: https://test.pypi.org/legacy/
+        run: |
+          python setup.py sdist bdist_wheel
+          pipenv run twine upload --repository-url $REPOSITORY_URL dist/*

From 2352d22bc1ef4a19ae2ef43187205f90b784b190 Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Thu, 20 Jul 2023 09:08:16 +0100
Subject: [PATCH 07/22] correct Circle config

---
 .circleci/config.yml | 45 +++++++++++++-------------------------------
 1 file changed, 13 insertions(+), 32 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index fe2afa3d..e89e805a 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,33 +1,14 @@
-name: Build and Publish to TestPyPI
-on:
-  push:
-    branches:
-      - main
-
-jobs:
-  build_test_publish:
-    runs-on: ubuntu-latest
+test_pypi_publish:
+    docker:
+      - image: cimg/python:3.11.0
     steps:
-      - name: Checkout code
-        uses: actions/checkout@v2
-
-      - name: Set up Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.x
-
-      - name: Install dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y python3-pip
-          pip install pipenv
-          pipenv install twine
-
-      - name: Build and Publish
-        env:
-          TWINE_USERNAME: __token__
-          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }}
-          REPOSITORY_URL: https://test.pypi.org/legacy/
-        run: |
-          python setup.py sdist bdist_wheel
-          pipenv run twine upload --repository-url $REPOSITORY_URL dist/*
+      - checkout # checkout source code to working directory
+      - run:
+          command: | # create whl, install twine and publish to Test PyPI
+            python3 setup.py sdist bdist_wheel
+            sudo add-apt-repository universe -y
+            sudo apt-get update
+            sudo apt install -y python3-pip
+            sudo pip install pipenv
+            pipenv install twine
+            pipenv run twine upload --repository testpypi dist/*
\ No newline at end of file

From 2ea30a9c54e4d0b6fdb0383ad9939d4895238bb0 Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Thu, 20 Jul 2023 09:13:06 +0100
Subject: [PATCH 08/22] correct Circle config

---
 .circleci/config.yml | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index e89e805a..2ec034d6 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,14 +1,14 @@
 test_pypi_publish:
-    docker:
-      - image: cimg/python:3.11.0
-    steps:
-      - checkout # checkout source code to working directory
-      - run:
-          command: | # create whl, install twine and publish to Test PyPI
-            python3 setup.py sdist bdist_wheel
-            sudo add-apt-repository universe -y
-            sudo apt-get update
-            sudo apt install -y python3-pip
-            sudo pip install pipenv
-            pipenv install twine
-            pipenv run twine upload --repository testpypi dist/*
\ No newline at end of file
+  docker:
+    - image: cimg/python:3.11.0
+  steps:
+    - checkout
+    - run: 
+        command: |
+          python3 setup.py sdist bdist_wheel
+          sudo add-apt-repository universe -y
+          sudo apt-get update
+          sudo apt install -y python3-pip
+          sudo pip install pipenv
+          pipenv install twine
+          pipenv run twine upload --repository testpypi dist/*
\ No newline at end of file

From 4377457d8fe31812c141ed71efd32ccbdf27ea98 Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Thu, 20 Jul 2023 09:14:23 +0100
Subject: [PATCH 09/22] correct Circle config

---
 .circleci/config.yml | 44 ++++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 2ec034d6..f0d9287b 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,14 +1,34 @@
-test_pypi_publish:
-  docker:
-    - image: cimg/python:3.11.0
-  steps:
-    - checkout
-    - run: 
-        command: |
-          python3 setup.py sdist bdist_wheel
-          sudo add-apt-repository universe -y
+name: TestPyPI Publish
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  test_pypi_publish:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.x
+
+      - name: Install dependencies
+        run: |
           sudo apt-get update
-          sudo apt install -y python3-pip
-          sudo pip install pipenv
+          sudo apt-get install -y python3-pip
+          pip install pipenv
           pipenv install twine
-          pipenv run twine upload --repository testpypi dist/*
\ No newline at end of file
+
+      - name: Build and Publish
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }}
+          REPOSITORY_URL: https://test.pypi.org/legacy/
+        run: |
+          python setup.py sdist bdist_wheel
+          pipenv run twine upload --repository-url $REPOSITORY_URL dist/*

From 6dd12b9236a07fb382f6af029740651d42c78c9e Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Fri, 17 Nov 2023 20:18:36 +0100
Subject: [PATCH 10/22] Add visualize functionality

---
 examples/speech_to_text/output/config.yaml |  2 +
 examples/speech_to_text/visualize.py       | 74 ++++++++++++++++++++++
 2 files changed, 76 insertions(+)
 create mode 100644 examples/speech_to_text/output/config.yaml
 create mode 100644 examples/speech_to_text/visualize.py

diff --git a/examples/speech_to_text/output/config.yaml b/examples/speech_to_text/output/config.yaml
new file mode 100644
index 00000000..76d90882
--- /dev/null
+++ b/examples/speech_to_text/output/config.yaml
@@ -0,0 +1,2 @@
+source_type: speech
+target_type: speech
diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py
new file mode 100644
index 00000000..627e5da9
--- /dev/null
+++ b/examples/speech_to_text/visualize.py
@@ -0,0 +1,74 @@
+import os
+import pandas as pd
+import argparse
+import pprint
+
+def read_scores_from_folder(folder_path):
+    score_file_path = os.path.join(folder_path, 'scores.tsv')
+    if os.path.isfile(score_file_path):
+        with open(score_file_path, "r") as f:
+            contents = [line.strip() for line in f.read().split("\n") if line.strip()]
+        return contents
+    else:
+        return None
+    
+def read_scores_files(output_folder):
+    all_contents = []
+
+    if not os.path.isdir(output_folder):
+        raise ValueError("Output folder does not exist")
+    
+    output_folder = os.path.abspath(output_folder)
+
+    for folder in os.listdir(output_folder):
+        folder_path = os.path.join(output_folder, folder)
+
+        if os.path.isdir(folder_path):
+            contents = read_scores_from_folder(folder_path)
+            if contents:
+                all_contents.append(contents)
+    return all_contents
+
+def process_result(output_folder, metric_names):
+    all_contents = read_scores_files(output_folder)
+    
+    # Extracting headers from the first line of each "scores.tsv" file
+    headers = [contents[0].split() for contents in all_contents if contents]
+    
+    if not headers:
+        raise ValueError("No headers found in the results")
+    reference_header = headers[0]
+
+    if metric_names is None:
+        metric_names = reference_header
+    common_metrics = set(metric_names).intersection(reference_header)
+
+    if not common_metrics:
+        raise ValueError("No common metrics found in the results")
+    
+    # Extracting scores for each metric
+    scores = []
+    for contents in all_contents:
+        if contents:
+            values = dict(zip(contents[0].split(), contents[1].split()))
+            scores.append(values)
+
+    df = pd.DataFrame(scores)
+
+    df = df.fillna(0.0)
+    filtered_df = df[df.columns[df.columns.isin(common_metrics)]]
+
+    if len(common_metrics) == 1:
+        metric_name = list(common_metrics)[0]
+        filtered_df = filtered_df[filtered_df[metric_name] != 0.0]
+
+    return filtered_df
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output", type=str, default=None, help="Output directory")
+    parser.add_argument("--metrics", type=str, nargs="+", default=None, help="Metrics to be extracted")
+    args = parser.parse_args()
+
+    df = process_result(args.output, args.metrics)
+    pprint.pprint(df)
\ No newline at end of file

From 7d434d9ecffbbd44be7af6c1ad0141acb158dc2a Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Fri, 17 Nov 2023 20:19:07 +0100
Subject: [PATCH 11/22] Ignore

---
 .gitignore | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 18eee607..2c2f6d1a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -139,4 +139,7 @@ cython_debug/
 .vscode
 
 # Mac files
-.DS_Store
\ No newline at end of file
+.DS_Store
+
+output
+exp.ipynb
\ No newline at end of file

From 9d42a66e49a05eb75b3dbf9d3e8589dd6617738b Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 22 Nov 2023 18:30:35 +0100
Subject: [PATCH 12/22] Lint with Black

---
 examples/speech_to_text/visualize.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py
index 627e5da9..0491d32b 100644
--- a/examples/speech_to_text/visualize.py
+++ b/examples/speech_to_text/visualize.py
@@ -3,21 +3,23 @@
 import argparse
 import pprint
 
+
 def read_scores_from_folder(folder_path):
-    score_file_path = os.path.join(folder_path, 'scores.tsv')
+    score_file_path = os.path.join(folder_path, "scores.tsv")
     if os.path.isfile(score_file_path):
         with open(score_file_path, "r") as f:
             contents = [line.strip() for line in f.read().split("\n") if line.strip()]
         return contents
     else:
         return None
-    
+
+
 def read_scores_files(output_folder):
     all_contents = []
 
     if not os.path.isdir(output_folder):
         raise ValueError("Output folder does not exist")
-    
+
     output_folder = os.path.abspath(output_folder)
 
     for folder in os.listdir(output_folder):
@@ -29,12 +31,13 @@ def read_scores_files(output_folder):
                 all_contents.append(contents)
     return all_contents
 
+
 def process_result(output_folder, metric_names):
     all_contents = read_scores_files(output_folder)
-    
+
     # Extracting headers from the first line of each "scores.tsv" file
     headers = [contents[0].split() for contents in all_contents if contents]
-    
+
     if not headers:
         raise ValueError("No headers found in the results")
     reference_header = headers[0]
@@ -45,7 +48,7 @@ def process_result(output_folder, metric_names):
 
     if not common_metrics:
         raise ValueError("No common metrics found in the results")
-    
+
     # Extracting scores for each metric
     scores = []
     for contents in all_contents:
@@ -64,11 +67,14 @@ def process_result(output_folder, metric_names):
 
     return filtered_df
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--output", type=str, default=None, help="Output directory")
-    parser.add_argument("--metrics", type=str, nargs="+", default=None, help="Metrics to be extracted")
+    parser.add_argument(
+        "--metrics", type=str, nargs="+", default=None, help="Metrics to be extracted"
+    )
     args = parser.parse_args()
 
     df = process_result(args.output, args.metrics)
-    pprint.pprint(df)
\ No newline at end of file
+    pprint.pprint(df)

From fbe6b956e5f9d8548b11ffa7ddc3db28ef32566a Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <55236862+SamDewriter@users.noreply.github.com>
Date: Fri, 24 Nov 2023 16:44:34 +0100
Subject: [PATCH 13/22] Update examples/speech_to_text/visualize.py

Add none to metric names

Co-authored-by: Giancarlo Fissore <giancarlo.fissore@gmail.com>
---
 examples/speech_to_text/visualize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py
index 0491d32b..4937d8a6 100644
--- a/examples/speech_to_text/visualize.py
+++ b/examples/speech_to_text/visualize.py
@@ -32,7 +32,7 @@ def read_scores_files(output_folder):
     return all_contents
 
 
-def process_result(output_folder, metric_names):
+def process_result(output_folder, metric_names=None):
     all_contents = read_scores_files(output_folder)
 
     # Extracting headers from the first line of each "scores.tsv" file

From a58bb51fd590d37354aa085c89d0b07f81e75aa9 Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 29 Nov 2023 10:10:16 +0100
Subject: [PATCH 14/22] Return error for files with no headers

---
 examples/speech_to_text/visualize.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py
index 0491d32b..a2fc1e80 100644
--- a/examples/speech_to_text/visualize.py
+++ b/examples/speech_to_text/visualize.py
@@ -1,7 +1,7 @@
 import os
 import pandas as pd
 import argparse
-import pprint
+from pprint import pprint
 
 
 def read_scores_from_folder(folder_path):
@@ -29,18 +29,24 @@ def read_scores_files(output_folder):
             contents = read_scores_from_folder(folder_path)
             if contents:
                 all_contents.append(contents)
-    return all_contents
+
+    headers_list = []
+    for contents in all_contents:
+        if contents:
+            header = contents[0].split()
+            if not header:
+                raise ValueError(f"Empty header in {contents}")
+            headers_list.append(header)
+
+    return all_contents, headers_list
 
 
-def process_result(output_folder, metric_names):
-    all_contents = read_scores_files(output_folder)
+def process_result(output_folder, metric_names=None):
+    all_contents, headers_list = read_scores_files(output_folder)
 
     # Extracting headers from the first line of each "scores.tsv" file
-    headers = [contents[0].split() for contents in all_contents if contents]
 
-    if not headers:
-        raise ValueError("No headers found in the results")
-    reference_header = headers[0]
+    reference_header = headers_list[0]
 
     if metric_names is None:
         metric_names = reference_header
@@ -58,7 +64,8 @@ def process_result(output_folder, metric_names):
 
     df = pd.DataFrame(scores)
 
-    df = df.fillna(0.0)
+    # Fill NaN values with NaN
+    df = df.fillna("NaN")
     filtered_df = df[df.columns[df.columns.isin(common_metrics)]]
 
     if len(common_metrics) == 1:
@@ -77,4 +84,4 @@ def process_result(output_folder, metric_names):
     args = parser.parse_args()
 
     df = process_result(args.output, args.metrics)
-    pprint.pprint(df)
+    pprint(df)

From 8505bf841ba1de492206675cc14890c6cbc052dd Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 29 Nov 2023 10:14:36 +0100
Subject: [PATCH 15/22] Format with black

---
 examples/speech_to_text/visualize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py
index 77a6f83a..cfdabcc0 100644
--- a/examples/speech_to_text/visualize.py
+++ b/examples/speech_to_text/visualize.py
@@ -43,7 +43,7 @@ def read_scores_files(output_folder):
 
 def process_result(output_folder, metric_names):
     all_contents, headers_list = read_scores_files(output_folder)
-    
+
     # Extracting headers from the first line of each "scores.tsv" file
     reference_header = headers_list[0]
 

From 5fbd227efaa05a77f9617973b72c25fc6abfc56f Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 29 Nov 2023 15:08:37 +0100
Subject: [PATCH 16/22] Add visualize argument

---
 simuleval/cli.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/simuleval/cli.py b/simuleval/cli.py
index b54fe756..a3796a12 100644
--- a/simuleval/cli.py
+++ b/simuleval/cli.py
@@ -9,6 +9,7 @@
 from simuleval import options
 from simuleval.utils.agent import build_system_args
 from simuleval.utils.slurm import submit_slurm_job
+from simuleval.utils.visualize import process_result
 from simuleval.utils.arguments import check_argument
 from simuleval.utils import EVALUATION_SYSTEM_LIST
 from simuleval.evaluator import (
@@ -39,6 +40,10 @@ def main():
     if check_argument("score_only"):
         scoring()
         return
+    
+    if check_argument("visualize"):
+        visualize()
+        return
 
     if check_argument("slurm"):
         submit_slurm_job()
@@ -98,6 +103,12 @@ def remote_evaluate():
     # evaluate system
     evaluator.remote_eval()
 
+def visualize():
+    parser = options.general_parser()
+    options.add_visualize_args(parser)
+    args = parser.parse_args()
+    visualizer = process_result(args.output, args.metrics)
+    print(visualizer)
 
 if __name__ == "__main__":
     main()

From 7ca184d7b9752dc4357967ca56bea1fefd204953 Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 29 Nov 2023 15:09:00 +0100
Subject: [PATCH 17/22] Add visualize argument

---
 simuleval/options.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/simuleval/options.py b/simuleval/options.py
index 4c706e27..f65f29e8 100644
--- a/simuleval/options.py
+++ b/simuleval/options.py
@@ -185,10 +185,33 @@ def general_parser():
         "--device", type=str, default="cpu", help="Device to run the model."
     )
     parser.add_argument("--fp16", action="store_true", default=False, help="Use fp16.")
+
+    parser.add_argument(
+        "--visualize",
+        action="store_true",
+        default=False,
+        help="Visualize the results.",
+    )
     return parser
 
 
+
 def add_slurm_args(parser):
     parser.add_argument("--slurm-partition", default="", help="Slurm partition.")
     parser.add_argument("--slurm-job-name", default="simuleval", help="Slurm job name.")
     parser.add_argument("--slurm-time", default="2:00:00", help="Slurm partition.")
+
+def add_visualize_args(parser):
+    parser.add_argument(
+        "--output",
+        type=str,
+        default=None,
+        help="Output directory",
+    )
+    parser.add_argument(
+        "--metrics",
+        type=str,
+        nargs="+",
+        default=None,
+        help="Metrics to be extracted",
+    )
\ No newline at end of file

From 23473800691f44941ce656ac860dfb8223258ef6 Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 29 Nov 2023 15:09:37 +0100
Subject: [PATCH 18/22] Add visualization script to utils

---
 simuleval/utils/visualize.py | 77 ++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 simuleval/utils/visualize.py

diff --git a/simuleval/utils/visualize.py b/simuleval/utils/visualize.py
new file mode 100644
index 00000000..bc080571
--- /dev/null
+++ b/simuleval/utils/visualize.py
@@ -0,0 +1,77 @@
+import os
+import pandas as pd
+import re
+
+def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"):
+    file_pattern = re.compile(file_pattern)
+
+    for file in os.listdir(folder_path):
+        if file_pattern.search(file):
+            score_file_path = os.path.join(folder_path, file)
+            # if os.path.isfile(score_file_path):
+            with open(score_file_path, "r") as f:
+                contents = [
+                    line.strip() for line in f.read().split("\n") if line.strip()
+                ]
+            return contents
+    return None
+
+
+def read_scores_files(output_folder, file_pattern=f"scores\.tsv$"):
+    all_contents = []
+
+    if not os.path.isdir(output_folder):
+        raise ValueError("Output folder does not exist")
+
+    output_folder = os.path.abspath(output_folder)
+
+    for folder in os.listdir(output_folder):
+        folder_path = os.path.join(output_folder, folder)
+
+        if os.path.isdir(folder_path):
+            contents = read_scores_from_folder(folder_path, file_pattern)
+            if contents:
+                all_contents.append(contents)
+
+    headers_list = []
+    for contents in all_contents:
+        if contents:
+            header = contents[0].split()
+            if not header:
+                raise ValueError(f"Empty header in {contents}")
+            headers_list.append(header)
+
+    return all_contents, headers_list
+
+
+def process_result(output_folder, metric_names):
+    all_contents, headers_list = read_scores_files(output_folder)
+
+    # Extracting headers from the first line of each "scores.tsv" file
+    reference_header = headers_list[0]
+
+    if metric_names is None:
+        metric_names = reference_header
+    common_metrics = set(metric_names).intersection(reference_header)
+
+    if not common_metrics:
+        raise ValueError("No common metrics found in the results")
+
+    # Extracting scores for each metric
+    scores = []
+    for contents in all_contents:
+        if contents:
+            values = dict(zip(contents[0].split(), contents[1].split()))
+            scores.append(values)
+
+    df = pd.DataFrame(scores)
+
+    # Fill NaN values with NaN
+    df = df.fillna("NaN")
+    filtered_df = df[df.columns[df.columns.isin(common_metrics)]]
+
+    if len(common_metrics) == 1:
+        metric_name = list(common_metrics)[0]
+        filtered_df = filtered_df[filtered_df[metric_name] != 0.0]
+
+    return filtered_df

From d3a86495849e4d661db2065c19763474f40aa939 Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 29 Nov 2023 15:11:11 +0100
Subject: [PATCH 19/22] Remove commented code

---
 simuleval/utils/visualize.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/simuleval/utils/visualize.py b/simuleval/utils/visualize.py
index bc080571..469df7cd 100644
--- a/simuleval/utils/visualize.py
+++ b/simuleval/utils/visualize.py
@@ -8,7 +8,6 @@ def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"):
     for file in os.listdir(folder_path):
         if file_pattern.search(file):
             score_file_path = os.path.join(folder_path, file)
-            # if os.path.isfile(score_file_path):
             with open(score_file_path, "r") as f:
                 contents = [
                     line.strip() for line in f.read().split("\n") if line.strip()

From 1d6c57bcdb94b638982cc35c70ae7b58ff7566b8 Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 29 Nov 2023 15:12:13 +0100
Subject: [PATCH 20/22] Reformat with Black

---
 examples/speech_to_text/visualize.py | 26 ++++++++------
 simuleval/cli.py                     |  4 ++-
 simuleval/evaluator/evaluator.py     | 51 ++++++++++++++++++++++------
 simuleval/options.py                 |  4 +--
 simuleval/utils/visualize.py         |  1 +
 5 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/examples/speech_to_text/visualize.py b/examples/speech_to_text/visualize.py
index cfdabcc0..92f59f3d 100644
--- a/examples/speech_to_text/visualize.py
+++ b/examples/speech_to_text/visualize.py
@@ -1,20 +1,26 @@
 import os
 import pandas as pd
+import re
 import argparse
 from pprint import pprint
 
 
-def read_scores_from_folder(folder_path):
-    score_file_path = os.path.join(folder_path, "scores.tsv")
-    if os.path.isfile(score_file_path):
-        with open(score_file_path, "r") as f:
-            contents = [line.strip() for line in f.read().split("\n") if line.strip()]
-        return contents
-    else:
-        return None
+def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"):
+    file_pattern = re.compile(file_pattern)
 
+    for file in os.listdir(folder_path):
+        if file_pattern.search(file):
+            score_file_path = os.path.join(folder_path, file)
+            # if os.path.isfile(score_file_path):
+            with open(score_file_path, "r") as f:
+                contents = [
+                    line.strip() for line in f.read().split("\n") if line.strip()
+                ]
+            return contents
+    return None
 
-def read_scores_files(output_folder):
+
+def read_scores_files(output_folder, file_pattern=f"scores\.tsv$"):
     all_contents = []
 
     if not os.path.isdir(output_folder):
@@ -26,7 +32,7 @@ def read_scores_files(output_folder):
         folder_path = os.path.join(output_folder, folder)
 
         if os.path.isdir(folder_path):
-            contents = read_scores_from_folder(folder_path)
+            contents = read_scores_from_folder(folder_path, file_pattern)
             if contents:
                 all_contents.append(contents)
 
diff --git a/simuleval/cli.py b/simuleval/cli.py
index a3796a12..fa0ec2f2 100644
--- a/simuleval/cli.py
+++ b/simuleval/cli.py
@@ -40,7 +40,7 @@ def main():
     if check_argument("score_only"):
         scoring()
         return
-    
+
     if check_argument("visualize"):
         visualize()
         return
@@ -103,6 +103,7 @@ def remote_evaluate():
     # evaluate system
     evaluator.remote_eval()
 
+
 def visualize():
     parser = options.general_parser()
     options.add_visualize_args(parser)
@@ -110,5 +111,6 @@ def visualize():
     visualizer = process_result(args.output, args.metrics)
     print(visualizer)
 
+
 if __name__ == "__main__":
     main()
diff --git a/simuleval/evaluator/evaluator.py b/simuleval/evaluator/evaluator.py
index a0e7e598..b017bea7 100644
--- a/simuleval/evaluator/evaluator.py
+++ b/simuleval/evaluator/evaluator.py
@@ -7,6 +7,7 @@
 import pandas
 import os
 import numbers
+import datetime
 from argparse import Namespace
 from typing import Dict, Generator, Optional
 from .scorers import get_scorer_class
@@ -213,19 +214,48 @@ def results(self):
         df = pandas.DataFrame(new_scores)
         return df
 
-    def dump_results(self) -> None:
+    def create_output_dir(self) -> Path:
+        timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+        output_directory = self.output / f"run-{timestamp}"
+        output_directory.mkdir(exist_ok=True, parents=True)
+        return output_directory
+
+    def dump_results_and_metrics(self) -> None:
         results = self.results
-        if self.output:
-            results.to_csv(self.output / "scores.tsv", sep="\t", index=False)
+        metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()])
+        metrics = metrics.round(3)
+
+        output_folder = self.create_output_dir()
+
+        results_filename = "scores.tsv"
+        metrics_filename = "metrics.tsv"
+
+        results.to_csv(output_folder / results_filename, sep="\t", index=False)
+        metrics.to_csv(output_folder / metrics_filename, sep="\t", index=False)
 
         logger.info("Results:")
         print(results.to_string(index=False))
 
-    def dump_metrics(self) -> None:
-        metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()])
-        metrics = metrics.round(3)
-        if self.output:
-            metrics.to_csv(self.output / "metrics.tsv", sep="\t", index=False)
+    # def dump_results(self) -> None:
+    #     results = self.results
+
+    #     timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+    #     filename = f"results-{timestamp}.tsv"
+
+    #     output_directory = self.output or Path(".")
+
+    #     if self.output:
+    #         output_directory = os.path.join(output_directory, filename)
+    #         results.to_csv(os.path.join(self.output, filename), sep="\t", index=False)
+
+    #     logger.info("Results:")
+    #     print(results.to_string(index=False))
+
+    # def dump_metrics(self) -> None:
+    #     metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()])
+    #     metrics = metrics.round(3)
+    #     if self.output:
+    #         metrics.to_csv(self.output / "metrics.tsv", sep="\t", index=False)
 
     def is_finished(self, instance) -> bool:
         if hasattr(instance, "source_finished_reading"):
@@ -250,8 +280,9 @@ def __call__(self, system):
             if not self.score_only:
                 self.write_log(instance)
 
-        self.dump_results()
-        self.dump_metrics()
+        # self.dump_results()
+        # self.dump_metrics()
+        self.dump_results_and_metrics()
 
     @classmethod
     def from_args(cls, args):
diff --git a/simuleval/options.py b/simuleval/options.py
index f65f29e8..27e259ea 100644
--- a/simuleval/options.py
+++ b/simuleval/options.py
@@ -195,12 +195,12 @@ def general_parser():
     return parser
 
 
-
 def add_slurm_args(parser):
     parser.add_argument("--slurm-partition", default="", help="Slurm partition.")
     parser.add_argument("--slurm-job-name", default="simuleval", help="Slurm job name.")
     parser.add_argument("--slurm-time", default="2:00:00", help="Slurm partition.")
 
+
 def add_visualize_args(parser):
     parser.add_argument(
         "--output",
@@ -214,4 +214,4 @@ def add_visualize_args(parser):
         nargs="+",
         default=None,
         help="Metrics to be extracted",
-    )
\ No newline at end of file
+    )
diff --git a/simuleval/utils/visualize.py b/simuleval/utils/visualize.py
index 469df7cd..ee4b6330 100644
--- a/simuleval/utils/visualize.py
+++ b/simuleval/utils/visualize.py
@@ -2,6 +2,7 @@
 import pandas as pd
 import re
 
+
 def read_scores_from_folder(folder_path, file_pattern=f"scores\.tsv$"):
     file_pattern = re.compile(file_pattern)
 

From 38f8cb01b91d96708f846528b9940dc5a0c6d323 Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 29 Nov 2023 15:15:33 +0100
Subject: [PATCH 21/22] Remove circle

---
 .circleci/config.yml | 34 ----------------------------------
 1 file changed, 34 deletions(-)
 delete mode 100644 .circleci/config.yml

diff --git a/.circleci/config.yml b/.circleci/config.yml
deleted file mode 100644
index f0d9287b..00000000
--- a/.circleci/config.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-name: TestPyPI Publish
-
-on:
-  push:
-    branches:
-      - main
-
-jobs:
-  test_pypi_publish:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v2
-
-      - name: Set up Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.x
-
-      - name: Install dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y python3-pip
-          pip install pipenv
-          pipenv install twine
-
-      - name: Build and Publish
-        env:
-          TWINE_USERNAME: __token__
-          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }}
-          REPOSITORY_URL: https://test.pypi.org/legacy/
-        run: |
-          python setup.py sdist bdist_wheel
-          pipenv run twine upload --repository-url $REPOSITORY_URL dist/*

From 82fa90c056e9e31d790629822d8b8eaa6a0ed5b7 Mon Sep 17 00:00:00 2001
From: Mubaraq Sani <{ID}+{username}@users.noreply.github.com>
Date: Wed, 29 Nov 2023 15:29:13 +0100
Subject: [PATCH 22/22] Revert back to initial

---
 simuleval/evaluator/evaluator.py | 45 +++++++++-----------------------
 1 file changed, 12 insertions(+), 33 deletions(-)

diff --git a/simuleval/evaluator/evaluator.py b/simuleval/evaluator/evaluator.py
index b017bea7..1e84db1a 100644
--- a/simuleval/evaluator/evaluator.py
+++ b/simuleval/evaluator/evaluator.py
@@ -220,42 +220,22 @@ def create_output_dir(self) -> Path:
         output_directory.mkdir(exist_ok=True, parents=True)
         return output_directory
 
-    def dump_results_and_metrics(self) -> None:
+    def dump_results(self) -> None:
         results = self.results
-        metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()])
-        metrics = metrics.round(3)
-
-        output_folder = self.create_output_dir()
-
-        results_filename = "scores.tsv"
-        metrics_filename = "metrics.tsv"
-
-        results.to_csv(output_folder / results_filename, sep="\t", index=False)
-        metrics.to_csv(output_folder / metrics_filename, sep="\t", index=False)
+        if self.output:
+            results.to_csv(self.output / "scores.tsv", sep="\t", index=False)
 
         logger.info("Results:")
         print(results.to_string(index=False))
 
-    # def dump_results(self) -> None:
-    #     results = self.results
-
-    #     timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
-    #     filename = f"results-{timestamp}.tsv"
-
-    #     output_directory = self.output or Path(".")
-
-    #     if self.output:
-    #         output_directory = os.path.join(output_directory, filename)
-    #         results.to_csv(os.path.join(self.output, filename), sep="\t", index=False)
-
-    #     logger.info("Results:")
-    #     print(results.to_string(index=False))
+        logger.info("Results:")
+        print(results.to_string(index=False))
 
-    # def dump_metrics(self) -> None:
-    #     metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()])
-    #     metrics = metrics.round(3)
-    #     if self.output:
-    #         metrics.to_csv(self.output / "metrics.tsv", sep="\t", index=False)
+    def dump_metrics(self) -> None:
+        metrics = pandas.DataFrame([ins.metrics for ins in self.instances.values()])
+        metrics = metrics.round(3)
+        if self.output:
+            metrics.to_csv(self.output / "metrics.tsv", sep="\t", index=False)
 
     def is_finished(self, instance) -> bool:
         if hasattr(instance, "source_finished_reading"):
@@ -280,9 +260,8 @@ def __call__(self, system):
             if not self.score_only:
                 self.write_log(instance)
 
-        # self.dump_results()
-        # self.dump_metrics()
-        self.dump_results_and_metrics()
+        self.dump_results()
+        self.dump_metrics()
 
     @classmethod
     def from_args(cls, args):