From 36e20e6ccd2883428420a8aa085081b3714c13fd Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 18 Nov 2025 18:15:53 -0500 Subject: [PATCH 01/16] Trying to get the Dockerbuild to work --- .github/workflows/publish_to_pypi.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml index 818c61b..e555a02 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/publish_to_pypi.yml @@ -96,6 +96,7 @@ jobs: uses: docker/metadata-action@v5 with: images: umasscds/autoipaalign + tags: type=ref - name: Login to Docker Hub uses: docker/login-action@v3 @@ -111,12 +112,10 @@ jobs: - name: Build and push uses: docker/build-push-action@v6 + id: push with: - context: . - file: ./Dockerfile push: true tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - name: Generate artifact attestation uses: actions/attest-build-provenance@v3 From d42cb28ea3e4f72786a51625ccf2f5350cb8d315 Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 18 Nov 2025 18:27:40 -0500 Subject: [PATCH 02/16] Remove ref restriction for docker meta --- .github/workflows/publish_to_pypi.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml index e555a02..78a8e0a 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/publish_to_pypi.yml @@ -96,7 +96,6 @@ jobs: uses: docker/metadata-action@v5 with: images: umasscds/autoipaalign - tags: type=ref - name: Login to Docker Hub uses: docker/login-action@v3 From 1dbabf7dcef93d36c77dc887dfd36e21a77d5033 Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 18 Nov 2025 18:37:55 -0500 Subject: [PATCH 03/16] Add permissions to dockerfile --- .github/workflows/publish_to_pypi.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml index 78a8e0a..109a5c1 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/publish_to_pypi.yml @@ -90,6 +90,12 @@ jobs: environment: name: dockerhub + permissions: + packages: write + contents: read + attestations: write + id-token: write + steps: - name: Docker meta id: meta From 9c2c2d6793c1fc16638329e3c2158bffa803d255 Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 18 Nov 2025 18:55:46 -0500 Subject: [PATCH 04/16] Exclude dev dependencies from Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 45bc562..09fea9a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,7 +17,7 @@ WORKDIR /app RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,source=uv.lock,target=uv.lock \ --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ - uv sync --locked --no-install-project --no-editable + uv sync --locked --no-install-project --no-editable --no-dev # Install our code COPY . /app From 21f76e73eb7a7e7ef5cf7845a8a01e1acbb6801a Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 18 Nov 2025 19:16:28 -0500 Subject: [PATCH 05/16] Add disk space cleanup to docker build --- .github/workflows/publish_to_pypi.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml index 109a5c1..ad33051 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/publish_to_pypi.yml @@ -97,6 +97,12 @@ jobs: id-token: write steps: + - name: Remove unnecessary files and check disk space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + df . -h + - name: Docker meta id: meta uses: docker/metadata-action@v5 @@ -121,6 +127,7 @@ jobs: with: push: true tags: ${{ steps.meta.outputs.tags }} + no-cache: true - name: Generate artifact attestation uses: actions/attest-build-provenance@v3 From 373c4c562937d9c43f5a89c8d392176033d7e79f Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 18 Nov 2025 19:33:38 -0500 Subject: [PATCH 06/16] Added platform list to docker build --- .github/workflows/publish_to_pypi.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml index ad33051..5641017 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/publish_to_pypi.yml @@ -125,6 +125,7 @@ jobs: uses: docker/build-push-action@v6 id: push with: + platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} no-cache: true From f7d86bf64de3f5192f7f652e8386790c8acdbcd1 Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 18 Nov 2025 19:45:27 -0500 Subject: [PATCH 07/16] Update text of HuggingFace Space intro --- src/autoipaalign/web/app.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/autoipaalign/web/app.py b/src/autoipaalign/web/app.py index d917a0e..b557eac 100644 --- a/src/autoipaalign/web/app.py +++ b/src/autoipaalign/web/app.py @@ -16,8 +16,9 @@ TITLE = "AutoIPA: Automated IPA transcription" INTRO_BLOCK = f"""# {TITLE} -Experiment with producing phonetic transcriptions of uploaded or recorded audio using Wav2Vec2.0-based automatic -speech recognition (ASR) models! +Experiment with producing +[International Phonetic Alphabet (IPA)](https://en.wikipedia.org/wiki/International_Phonetic_Alphabet) transcriptions +of uploaded or recorded audio using Wav2Vec2.0-based automatic speech recognition (ASR) models! The AutoIPA project is a collaboration between Virginia Partridge of the UMass Center for Data Science and Artificial Intelligence and Joe Pater of UMass Linguistics. Its goal is to make automated IPA transcription more useful From 471305c700ff0b63439f7ef7eaed3397bafd9aad Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 18 Nov 2025 19:48:33 -0500 Subject: [PATCH 08/16] Fix typo in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 06eda89..0045990 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Partridge, Virginia, Joe Pater, Parth Bhangla, Ali Nirheche and Brandon Prickett This is project is structured in multiple subpackages based on their different external dependencies: - **autoipaalign.core**: Core library and command-line interface for IPA transcription and forced alignments. Always installed. - **autoipaalign.compare**: Tools for comparing alignments across different ASR systems, such as whisper and the Montreal Forced Aligner. Install with `pip install autoipaalign[compare]`. -- **autoipaalign.web**: Gradio web interface for interactive transcription. Install with `pip install autoipaalign[compare]`. +- **autoipaalign.web**: Gradio web interface for interactive transcription. Install with `pip install autoipaalign[web]`. ### Basic Installation TODO: Pip install instructions coming soon. From 4815dfef1fdb643eba26d7f0c8f92a897fa6e955 Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 18 Nov 2025 19:50:29 -0500 Subject: [PATCH 09/16] Updated forced alignment button text --- src/autoipaalign/web/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autoipaalign/web/app.py b/src/autoipaalign/web/app.py index b557eac..0f66b50 100644 --- a/src/autoipaalign/web/app.py +++ b/src/autoipaalign/web/app.py @@ -251,7 +251,7 @@ def launch_demo(): interactive=True, ) - phone_aligned = gr.Checkbox(label="Add forced-alignments for predictions in their own TextGrid") + phone_aligned = gr.Checkbox(label="Add forced-alignments for predictions in their own TextGrid interval tier") model_state = gr.State(value=initial_model) From 6462c477c8d666edf49e3694ce3948ae38ac9486 Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 16 Dec 2025 16:09:43 -0500 Subject: [PATCH 10/16] Updated readme instructions for Docker and pip installation --- README.md | 55 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 0045990..96a4a86 100644 --- a/README.md +++ b/README.md @@ -17,11 +17,14 @@ Partridge, Virginia, Joe Pater, Parth Bhangla, Ali Nirheche and Brandon Prickett ## Basic Usage This is project is structured in multiple subpackages based on their different external dependencies: - **autoipaalign.core**: Core library and command-line interface for IPA transcription and forced alignments. Always installed. -- **autoipaalign.compare**: Tools for comparing alignments across different ASR systems, such as whisper and the Montreal Forced Aligner. Install with `pip install autoipaalign[compare]`. +- **autoipaalign.compare**: Tools for comparing alignments across different ASR systems, such as whisper and the Montreal Forced Aligner. Install with `pip install autoipaalign[compare]`. You should also install the Montral Forced Aligner, see instructions under [External Dependencies](#external-dependencies). - **autoipaalign.web**: Gradio web interface for interactive transcription. Install with `pip install autoipaalign[web]`. ### Basic Installation -TODO: Pip install instructions coming soon. +You can install the `autoipaalign` package with `pip install autoipaalign`. + +We recommend first creating and working in a [Conda Virtual Environment](https://realpython.com/python-virtual-environments-a-primer/#the-conda-package-and-environment-manager) for better integration with Pytorch and the Montreal Forced Aligner. + ### Command-Line Interface The `autoipaalign` command lets you transcribe audio and get TextGrid output files with or without forced alignment. @@ -58,6 +61,23 @@ Then open your browser to the URL shown in the terminal. ## Advanced Usage +### + +### External Dependencies + +- **Montreal Forced Aligner** (optional, for MFA-based comparisons) should be installed when working with the optional `compare` package. + ```bash + # Install via conda + conda install -c conda-forge montreal-forced-aligner + ``` + +### Comparison Tools +Compare alignments from different ASR systems (coming soon). + + +## Development Environment + + ### Installing the Development Workspace This project is structured using [uv workspaces](https://docs.astral.sh/uv/concepts/projects/workspaces/) based on [this template](https://github.com/konstin/uv-workspace-example-cable/tree/main). @@ -73,20 +93,6 @@ This project is structured using [uv workspaces](https://docs.astral.sh/uv/conce uv sync --all-extras ``` -### External Dependencies - -- **Montreal Forced Aligner** (optional, for MFA-based comparisons) should be installed when working with the optional `compare` package. -TODO: update installation instructions for working wiht - ```bash - # Install via conda - conda install -c conda-forge montreal-forced-aligner - ``` - -### Comparison Tools - -Compare alignments from different ASR systems (documentation coming soon). - - ### Running Tests To run unit tests, you can run `uv run pytest` from the root of the repository or inside any of the package subfolders (e.g. `packages/autoipaalign-core`). @@ -100,3 +106,20 @@ Run these checks as follows: uv run ruff check . uv run ruff format . ``` + +### Building Docker image for the web application +To make it easier to deploy and run the web application on HuggingFace Spaces, the application can be packaged as a [Docker](https://docs.docker.com) image. +We've provided a Dockerfile to build an image for the web app. + +You can build an image named `autoipaalign` by running: +```bash +docker build -t autoipaalign . +``` + +Run a Docker container from this image on port 7860: +```bash +docker run -t autoipaalign -p 7860:7860 +``` +You can then access the running web application at `http://localhost:7860`. + +A Docker image is built and pushed to the UMass CDSAI Dockerhub at https://hub.docker.com/repository/docker/umasscds/autoipaalign/general each time a new version of the autoipaalign package is released. From 27b5e16c4b027523973f1d850194b1b9ef018b19 Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 16 Dec 2025 16:11:10 -0500 Subject: [PATCH 11/16] Fixed Docker run command --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 96a4a86..ddb44bb 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ docker build -t autoipaalign . Run a Docker container from this image on port 7860: ```bash -docker run -t autoipaalign -p 7860:7860 +docker run -p 7860:7860 autoipaalign ``` You can then access the running web application at `http://localhost:7860`. From 5394b8d3d4e2b2fc4c12433100d41079f49423a5 Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 16 Dec 2025 16:21:21 -0500 Subject: [PATCH 12/16] Updated README typos and clarity issues --- README.md | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index ddb44bb..3838eaa 100644 --- a/README.md +++ b/README.md @@ -5,19 +5,17 @@ Automatically transcribe audio into the International Phonetic Alphabet (IPA) an The AutoIPA project is a collaboration between Virginia Partridge of the UMass Center for Data Science and Artificial Intelligence and Joe Pater of UMass Linguistics. Its goal is to make automated IPA transcription more useful to linguists (and others!). -Our first step was to fine-tune a Wav2Vec 2.0 model on the Buckeye corpus, which you can try out here. -Our next steps will be to extend our work to other varieties of English and other languages. Please reach out to us if you have any questions or comments about our work or have related work to share! More details are on our [project website](https://websites.umass.edu/comphon/autoipa-automated-ipa-transcription/). If you use our software, please cite our AMP paper: -Partridge, Virginia, Joe Pater, Parth Bhangla, Ali Nirheche and Brandon Prickett. 2025/to appear. [AI-assisted analysis of phonological variation in English](https://docs.google.com/presentation/d/1IJrfokvX5T_fKkiFXmcYEgRI2ZRwgFU4zU1tNC-iYl0/edit?usp=sharing). Special session on Deep Phonology, AMP 2025, UC Berkeley. To appear in the Proceedings of AMP 2025. -""" + +> Partridge, Virginia, Joe Pater, Parth Bhangla, Ali Nirheche and Brandon Prickett. 2025/to appear. [AI-assisted analysis of phonological variation in English](https://docs.google.com/presentation/d/1IJrfokvX5T_fKkiFXmcYEgRI2ZRwgFU4zU1tNC-iYl0/edit?usp=sharing). Special session on Deep Phonology, AMP 2025, UC Berkeley. To appear in the Proceedings of AMP 2025. ## Basic Usage -This is project is structured in multiple subpackages based on their different external dependencies: +This project is structured in multiple subpackages based on their different external dependencies: - **autoipaalign.core**: Core library and command-line interface for IPA transcription and forced alignments. Always installed. -- **autoipaalign.compare**: Tools for comparing alignments across different ASR systems, such as whisper and the Montreal Forced Aligner. Install with `pip install autoipaalign[compare]`. You should also install the Montral Forced Aligner, see instructions under [External Dependencies](#external-dependencies). +- **autoipaalign.compare**: Tools for comparing alignments across different ASR systems, such as whisper and the Montreal Forced Aligner. Install with `pip install autoipaalign[compare]`. You should also install the Montreal Forced Aligner, see instructions under [External Dependencies](#external-dependencies). - **autoipaalign.web**: Gradio web interface for interactive transcription. Install with `pip install autoipaalign[web]`. ### Basic Installation @@ -55,14 +53,12 @@ autoipaalign transcribe --audio-paths audio.wav --output-target output/ --asr.mo ### Web Interface ```bash -python -m autoipaalign_web.app +python -m autoipaalign.web.app ``` Then open your browser to the URL shown in the terminal. ## Advanced Usage -### - ### External Dependencies - **Montreal Forced Aligner** (optional, for MFA-based comparisons) should be installed when working with the optional `compare` package. @@ -86,7 +82,7 @@ This project is structured using [uv workspaces](https://docs.astral.sh/uv/conce curl -LsSf https://astral.sh/uv/install.sh | sh ``` -2. Clone the repository and install to set up development and testing dependencies:: +2. Clone the repository and install to set up development and testing dependencies: ```bash git clone cd autoipaalign From e9587fba6eaab0b065d053db94c083c38f8ebab1 Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 16 Dec 2025 16:25:38 -0500 Subject: [PATCH 13/16] Updating changelog for actual release --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07c005d..72ad680 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [v1.0.0] - 2025-11-18 +## [v1.0.0] - 2025-12-16 ### Added - Command line interface to transcribe audio with HuggingFace ASR models and export them as TextGrid - Option to do forced alignment with the ASR model's vocabulary and add them as time intervals to TextGrid - Gradio web app as an interactive wrapper around the command line structure -- Unit tests and overall package structure \ No newline at end of file +- Unit tests and overall package structure +- Added Docker image building \ No newline at end of file From 3d6cb4df15be7e37fff6b0088e79eab83541ddbb Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 16 Dec 2025 16:28:38 -0500 Subject: [PATCH 14/16] Getting rid of auto complete nonsense in changelog --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72ad680..fae5399 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,4 @@ # Changelog - All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), From 0d8125ce03d31d07abdc06df0abcd49f486e9b7f Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 16 Dec 2025 16:36:04 -0500 Subject: [PATCH 15/16] Added alternate text to smoke test check --- tests/core_smoke_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/core_smoke_test.py b/tests/core_smoke_test.py index 5d15ab4..3943c8c 100644 --- a/tests/core_smoke_test.py +++ b/tests/core_smoke_test.py @@ -57,7 +57,8 @@ def test_cli_main_callable(): # Check for expected error message in either stdout or stderr expected_text = "The following arguments are required: {transcribe,transcribe-intervals}" - if expected_text not in stderr_output: + alternate_expected_text = "Expected one of {transcribe, transcribe-intervals}." + if expected_text not in stderr_output or alternate_expected_text not in stderr_output: raise AssertionError(f"Expected error message not found. Output: {stderr_output}") finally: # Ensure stdout/stderr are always restored From 163c6686476cb34a93b1db5d36c8b9b64210e7eb Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Tue, 16 Dec 2025 16:43:03 -0500 Subject: [PATCH 16/16] Smoke test flags corrected --- tests/core_smoke_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/core_smoke_test.py b/tests/core_smoke_test.py index 3943c8c..2f595d5 100644 --- a/tests/core_smoke_test.py +++ b/tests/core_smoke_test.py @@ -58,7 +58,7 @@ def test_cli_main_callable(): # Check for expected error message in either stdout or stderr expected_text = "The following arguments are required: {transcribe,transcribe-intervals}" alternate_expected_text = "Expected one of {transcribe, transcribe-intervals}." - if expected_text not in stderr_output or alternate_expected_text not in stderr_output: + if not (expected_text in stderr_output or alternate_expected_text in stderr_output): raise AssertionError(f"Expected error message not found. Output: {stderr_output}") finally: # Ensure stdout/stderr are always restored