analyticsinmotion · rossarmstrong · Dec 15, 2025 · Dec 14, 2025 · Dec 14, 2025 · Dec 15, 2025
diff --git a/.gitignore b/.gitignore
@@ -185,3 +185,7 @@ cython_debug/
 # Meson build directories
 build/
 builddir/
+
+# Personal development workspace (local testing only)
+development/
+!development/README.md
diff --git a/.pylintrc b/.pylintrc
@@ -2,7 +2,15 @@
 max-line-length = 120
 
 [pylint]
-good-names=i,j,m,n,ld,df
+good-names=i,j,m,n,ld,df,ref,hyp
 
 [MESSAGES CONTROL]
-disable=too-many-locals
+disable=
+    too-many-locals,
+    import-error,
+    duplicate-code
+
+[MASTER]
+# Ignore benchmark and development files that use optional dependencies
+ignore-paths=
+    ^(benchmarks|development|docs)[\\/].*$
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,33 @@
 This changelog file outlines a chronologically ordered list of the changes made on this project.
 It is organized by version and release date followed by a list of Enhancements, New Features, Bug Fixes, and/or Breaking Changes.
 
+## Version 3.2.0
+
+**Released:** December 15, 2025
+**Tag:** v3.2.0
+
+### Enhancements
+
+- Refactored metrics computation architecture to eliminate `np.vectorize()` overhead by replacing it with a C-level batch processing loop (`_metrics_batch()`). This provides cleaner code structure and establishes a foundation for future performance optimizations without introducing any performance regression.
+
+- Fixed double computation bug where `error_handler()` was calling `metrics()` for validation, then wrapper functions were computing metrics again. The `error_handler()` function is now validation-only, and all metric calculations happen through a single unified `metrics()` entry point, improving efficiency and code maintainability.
+
+- Standardized internal metrics return format to row-based `(n, 9)` array structure instead of columnar format. This simplifies DataFrame construction in `summary()` and `summaryp()` functions by eliminating complex transpose operations and reducing code complexity.
+
+- Improved code organization with unified `metrics()` router function that dispatches to either single-pair `calculations()` or batch `_metrics_batch()` processing, providing a cleaner and more maintainable architecture for metric computation.
+
+- Updated Pylint configuration to suppress import errors for Cython modules during static analysis and exclude benchmark/development directories from linting. This resolves CI/CD build failures while maintaining code quality standards for the core package.
+
+- Optimized Levenshtein distance algorithm in `calculations()` function with C-level performance improvements: replaced `np.zeros()` with `np.empty()` to eliminate redundant initialization, moved boundary condition initialization outside the main DP loop to remove conditional branches from the hot path, and replaced Python's `min()` function with manual C-level sequential comparisons.
+
+- Implemented dual-path architecture with fast path optimization for functions that don't require word tracking. Added three new functions (`calculations_fast()`, `_metrics_batch_fast()`, `metrics_fast()`) that skip word list construction and return float64 arrays instead of object arrays. Updated `wer()`, `wers()`, `werp()`, and `werps()` functions to use the fast path, achieving performance improvement on synthetic benchmarks. Functions requiring word tracking (`summary()` and `summaryp()`) continue using the full path.
+
+### Bug Fixes
+
+- Expanded try/except scope in all wrapper functions (`wer.py`, `wers.py`, `werp.py`, `werps.py`, `summary.py`, `summaryp.py`) to properly catch exceptions from both validation (`error_handler()`) and computation (`metrics()`/`metrics_fast()`). This fixes 6 pre-existing test failures where invalid input types (e.g., lists of integers) would crash instead of returning None with an error message.
+
+- Added division-by-zero guards in `calculations_fast()` function (`wer = (<double>ld) / m if m > 0 else 0.0`) and corpus-level wrapper functions (`wer.py`, `werp.py`) to prevent crashes on empty input. Also added per-row masked division in `werps.py` to handle cases where individual samples have zero reference length.
+
 ## Version 3.1.1
 
 **Released:** December 14, 2025  

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -19,7 +19,7 @@
 project = "werpy"
 copyright = f'{datetime.now().year} <a href="https://www.analyticsinmotion.com">Analytics in Motion</a>'
 author = "Ross Armstrong"
-release = "3.1.1"
+release = "3.2.0"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

diff --git a/meson.build b/meson.build
@@ -1,7 +1,7 @@
 project(
     'werpy', 
     'c', 'cython',
-    version : '3.1.1',
+    version : '3.2.0',
     license: 'BSD-3',
     meson_version: '>= 1.1.0',
     default_options : [

diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ requires = [
 
 [project]
 name = 'werpy'
-version = '3.1.1'
+version = '3.2.0'
 description = 'A powerful yet lightweight Python package to calculate and analyze the Word Error Rate (WER).'
 readme = 'README.md'
 requires-python = '>=3.10'

diff --git a/werpy/__init__.py b/werpy/__init__.py
@@ -5,7 +5,7 @@
 The werpy package provides tools for calculating word error rates (WERs) and related metrics on text data.
 """
 
-__version__ = "3.1.1"
+__version__ = "3.2.0"
 
 from .errorhandler import error_handler
 from .normalize import normalize

diff --git a/werpy/errorhandler.py b/werpy/errorhandler.py
@@ -2,15 +2,19 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 """
-Responsible for defining custom exceptions and handling errors across the package.
+Input validation and consistent exceptions for werpy public functions.
 """
 
-from .metrics import metrics
+import numpy as np
 
 
 def error_handler(reference, hypothesis):
     """
-    This function provides the overall wrapper to handle exceptions within this package.
+    Validate inputs and raise consistent exceptions.
+
+    This function does not compute metrics. Computation is handled by:
+    - metrics.metrics (router) for strings and batches
+    - metrics.calculations for a single pair
 
     Parameters
     ----------
@@ -30,24 +34,43 @@ def error_handler(reference, hypothesis):
 
     Returns
     -------
-    np.ndarray
-        This function will return a ragged array containing the Word Error Rate, Levenshtein distance, the number of
-        words in the reference sequence, insertions count, deletions count, substitutions count, a list of inserted
-        words, a list of deleted words and a list of substituted words.
+    bool
+        True if validation passes.
     """
-    try:
-        word_error_rate_breakdown = metrics(reference, hypothesis)
-    except ValueError as exc:
-        raise ValueError(
-            "The Reference and Hypothesis input parameters must have the same number of elements."
-        ) from exc
-    except AttributeError as exc:
+    valid_types = (str, list, np.ndarray)
+
+    if not isinstance(reference, valid_types) or not isinstance(hypothesis, valid_types):
         raise AttributeError(
             "All text should be in a string format. Please check your input does not include any "
             "Numeric data types."
-        ) from exc
-    except ZeroDivisionError as exc:
+        )
+
+    ref_is_seq = isinstance(reference, (list, np.ndarray))
+    hyp_is_seq = isinstance(hypothesis, (list, np.ndarray))
+
+    if ref_is_seq != hyp_is_seq:
+        raise AttributeError(
+            "Reference and hypothesis must both be strings, or both be lists/arrays."
+        )
+
+    if ref_is_seq and hyp_is_seq:
+        if len(reference) != len(hypothesis):
+            raise ValueError(
+                "The Reference and Hypothesis input parameters must have the same number of elements."
+            )
+        return True
+
+    # At this point, both are strings (validated above)
+    ref_s = str(reference).strip()
+    hyp_s = str(hypothesis).strip()
+
+    if ref_s == "" and hyp_s == "":
         raise ZeroDivisionError(
             "Invalid input: reference must not be blank, and reference and hypothesis cannot both be empty."
-        ) from exc
-    return word_error_rate_breakdown
+        )
+    if ref_s == "":
+        raise ZeroDivisionError(
+            "Invalid input: reference must not be blank, and reference and hypothesis cannot both be empty."
+        )
+
+    return True