From ec23dba97ee53a7cfca00a305712240058819472 Mon Sep 17 00:00:00 2001 From: MikeSmithEU Date: Thu, 5 Nov 2020 22:45:41 +0100 Subject: [PATCH 1/5] levenshtein (tmp commit, incorrect opcodes) --- src/benchmarkstt/diff/core.py | 50 +++++++++++++++++++++---- src/benchmarkstt/metrics/core.py | 27 +++++-------- tests/benchmarkstt/test_benchmarkstt.py | 10 ----- tests/benchmarkstt/test_cli.py | 32 +++++++++++----- tests/benchmarkstt/test_diff.py | 48 +++++++++++++++++++----- tests/benchmarkstt/test_metrics_core.py | 4 +- 6 files changed, 116 insertions(+), 55 deletions(-) diff --git a/src/benchmarkstt/diff/core.py b/src/benchmarkstt/diff/core.py index 69efe442..49b99318 100644 --- a/src/benchmarkstt/diff/core.py +++ b/src/benchmarkstt/diff/core.py @@ -4,6 +4,7 @@ from difflib import SequenceMatcher from benchmarkstt.diff import Differ +import edit_distance class RatcliffObershelp(Differ): @@ -12,9 +13,7 @@ class RatcliffObershelp(Differ): From difflib.SequenceMatcher_ (Copyright_ 2001-2020, Python Software Foundation.) - SequenceMatcher is a flexible class for comparing pairs of sequences of - any type, so long as the sequence elements are hashable. The basic - algorithm predates, and is a little fancier than, an algorithm + The basic algorithm predates, and is a little fancier than, an algorithm published in the late 1980's by Ratcliff and Obershelp under the hyperbolic name "gestalt pattern matching". The basic idea is to find the longest contiguous matching subsequence that contains no "junk" @@ -29,11 +28,48 @@ class RatcliffObershelp(Differ): """ def __init__(self, a, b, **kwargs): - if 'autojunk' not in kwargs: - kwargs['autojunk'] = False kwargs['a'] = a kwargs['b'] = b - self.matcher = SequenceMatcher(**kwargs) + self._kwargs = kwargs + self._matcher = SequenceMatcher(**self._kwargs) def get_opcodes(self): - return self.matcher.get_opcodes() + return self._matcher.get_opcodes() + + +class Levenshtein(Differ): + """ + Levenshtein_ distance is the minimum edit distance. + + .. _Levenshtein: https://en.wikipedia.org/wiki/Levenshtein_distance + """ + + def __init__(self, a, b, **kwargs): + kwargs['a'] = a + kwargs['b'] = b + if 'action_function' not in kwargs: + kwargs['action_function'] = edit_distance.highest_match_action + self._kwargs = kwargs + self._matcher = edit_distance.SequenceMatcher(**self._kwargs) + + def get_opcodes(self): + return self.simplify_opcodes(self._matcher.get_opcodes()) + + @staticmethod + def simplify_opcodes(opcodes): + new_codes = [] + prev = None + for cur in opcodes: + if prev is None: + prev = cur + elif cur[0] == prev[0]: + prev[2] = cur[2] + prev[4] = cur[4] + else: + new_codes.append(tuple(prev)) + prev = cur + + if prev is not None: + new_codes.append(tuple(prev)) + + return new_codes diff --git a/src/benchmarkstt/metrics/core.py b/src/benchmarkstt/metrics/core.py index 99fcc79f..4c419edc 100644 --- a/src/benchmarkstt/metrics/core.py +++ b/src/benchmarkstt/metrics/core.py @@ -1,6 +1,5 @@ from benchmarkstt.schema import Schema import logging -import json from benchmarkstt.diff import Differ from benchmarkstt.diff.core import RatcliffObershelp from benchmarkstt.diff.formatter import format_diff @@ -44,9 +43,10 @@ def get_opcode_counts(opcodes) -> OpcodeCounts: def get_differ(a, b, differ_class: Differ): - if differ_class is None: - # differ_class = HuntMcIlroy + if differ_class is None or differ_class == '': differ_class = RatcliffObershelp + elif type(differ_class) is str: + differ_class = differ_factory[differ_class] return differ_class(traversible(a), traversible(b)) @@ -54,12 +54,13 @@ class WordDiffs(Metric): """ Present differences on a per-word basis + :param differ_class: see :py:mod:`benchmarkstt.Differ.core` :param dialect: Presentation format. Default is 'ansi'. + :example differ_class: 'levenshtein' :example dialect: 'html' - :param differ_class: For future use. """ - def __init__(self, dialect=None, differ_class: Differ = None): + def __init__(self, differ_class: Differ = None, dialect: str = None): self._differ_class = differ_class self._dialect = dialect @@ -92,21 +93,13 @@ class WER(Metric): See https://docs.python.org/3/library/difflib.html - [Mode: 'levenshtein'] In the context of WER, Levenshtein - distance is the minimum edit distance computed at the - word level. This implementation uses the Editdistance - c++ implementation by Hiroyuki Tanaka: - https://github.com/aflc/editdistance. See: - https://en.wikipedia.org/wiki/Levenshtein_distance - :param mode: 'strict' (default), 'hunt' or 'levenshtein'. - :param differ_class: For future use. + :param differ_class: see :py:mod:`benchmarkstt.Differ.core` """ # WER modes MODE_STRICT = 'strict' MODE_HUNT = 'hunt' - MODE_LEVENSHTEIN = 'levenshtein' DEL_PENALTY = 1 INS_PENALTY = 1 @@ -114,8 +107,6 @@ class WER(Metric): def __init__(self, mode=None, differ_class: Differ = None): self._mode = mode - if mode == self.MODE_LEVENSHTEIN: - return if differ_class is None: differ_class = RatcliffObershelp @@ -129,7 +120,7 @@ def compare(self, ref: Schema, hyp: Schema) -> float: hyp_list = [i['item'] for i in hyp] total_ref = len(ref_list) if total_ref == 0: - return 0 if len(hyp_list) == 0 else 1 + return 1 return editdistance.eval(ref_list, hyp_list) / total_ref diffs = get_differ(ref, hyp, differ_class=self._differ_class) @@ -197,6 +188,8 @@ def compare(self, ref: Schema, hyp: Schema): class DiffCounts(Metric): """ Get the amount of differences between reference and hypothesis + + :param differ_class: see :py:mod:`benchmarkstt.Differ.core` """ def __init__(self, differ_class: Differ = None): diff --git a/tests/benchmarkstt/test_benchmarkstt.py b/tests/benchmarkstt/test_benchmarkstt.py index f7a9d5a1..f7469f16 100644 --- a/tests/benchmarkstt/test_benchmarkstt.py +++ b/tests/benchmarkstt/test_benchmarkstt.py @@ -11,16 +11,6 @@ def _(): return _ -class ToDefer: - def __init__(self, value): - self.value = value - self.cb_count = 0 - - def __repr__(self): - self.cb_count += 1 - return '' % (repr(self.value),) - - def test_deferred_str(): callback = cb('test') deferred = DeferredCallback(callback) diff --git a/tests/benchmarkstt/test_cli.py b/tests/benchmarkstt/test_cli.py index 6d1c4061..02f444f2 100644 --- a/tests/benchmarkstt/test_cli.py +++ b/tests/benchmarkstt/test_cli.py @@ -71,16 +71,28 @@ ']}\n]\n' ], ['normalization -i ./resources/test/_data/candide.txt ./resources/test/_data/candide.txt -o /dev/null', 2], - ['metrics -r "HELLO WORLD OF MINE" --hypothesis "GOODBYE CRUEL WORLD OF MINE" -rt argument -ht argument ' - '--worddiffs --output-format json', - '[\n\t{"title": "worddiffs", "result": [' - '{"type": "replace", "reference": "HELLO", "hypothesis": "GOODBYE"}, ' - '{"type": "insert", "reference": null, "hypothesis": "CRUEL"}, ' - '{"type": "equal", "reference": "WORLD", "hypothesis": "WORLD"}, ' - '{"type": "equal", "reference": "OF", "hypothesis": "OF"}, ' - '{"type": "equal", "reference": "MINE", "hypothesis": "MINE"}' - ']}\n]\n' - ], + [ + 'metrics -r "HELLO WORLD OF MINE" --hypothesis "GOODBYE CRUEL WORLD OF MINE" -rt argument -ht argument ' + '--worddiffs levenshtein --output-format json', + '[\n\t{"title": "worddiffs", "result": [' + '{"type": "replace", "reference": "HELLO", "hypothesis": "GOODBYE"}, ' + '{"type": "insert", "reference": null, "hypothesis": "CRUEL"}, ' + '{"type": "equal", "reference": "WORLD", "hypothesis": "WORLD"}, ' + '{"type": "equal", "reference": "OF", "hypothesis": "OF"}, ' + '{"type": "equal", "reference": "MINE", "hypothesis": "MINE"}' + ']}\n]\n' + ], + [ + 'metrics -r "HELLO WORLD OF MINE" --hypothesis "GOODBYE CRUEL WORLD OF MINE" -rt argument -ht argument ' + '--worddiffs --output-format json', + '[\n\t{"title": "worddiffs", "result": [' + '{"type": "replace", "reference": "HELLO", "hypothesis": "GOODBYE"}, ' + '{"type": "insert", "reference": null, "hypothesis": "CRUEL"}, ' + '{"type": "equal", "reference": "WORLD", "hypothesis": "WORLD"}, ' + '{"type": "equal", "reference": "OF", "hypothesis": "OF"}, ' + '{"type": "equal", "reference": "MINE", "hypothesis": "MINE"}' + ']}\n]\n' + ], ['metrics -r "HELLO CRUEL WORLD OF MINE" -h "GOODBYE WORLD OF MINE" -rt argument -ht argument ' '--worddiffs --output-format json', '[\n\t{"title": "worddiffs", "result": [' diff --git a/tests/benchmarkstt/test_diff.py b/tests/benchmarkstt/test_diff.py index 742038c7..285754bf 100644 --- a/tests/benchmarkstt/test_diff.py +++ b/tests/benchmarkstt/test_diff.py @@ -1,10 +1,35 @@ from benchmarkstt import diff +from benchmarkstt.diff.core import RatcliffObershelp import pytest -differs = [differ.cls for differ in diff.factory] +differs = [differ.cls for differ in diff.factory if differ.name != 'levenshtein'] differs_decorator = pytest.mark.parametrize('differ', differs) +@differs_decorator +def test_simplest(differ): + sm = differ( + list('012345'), + list('023345') + ) + assert list(sm.get_opcodes()) == [('equal', 0, 1, 0, 1), + ('delete', 1, 2, 1, 1), + ('equal', 2, 3, 1, 2), + ('insert', 3, 3, 2, 3), + ('equal', 3, 6, 3, 6)] + + +@differs_decorator +def test_simple(differ): + sm = differ( + '0123456HIJkopq', + '0123456HIJKlmnopq' + ) + assert list(sm.get_opcodes()) == [('equal', 0, 10, 0, 10), + ('replace', 10, 11, 10, 14), + ('equal', 11, 14, 14, 17)] + + @differs_decorator def test_one_insert(differ): sm = differ('b' * 100, 'a' + 'b' * 100) @@ -14,15 +39,6 @@ def test_one_insert(differ): assert list(sm.get_opcodes()) == [('equal', 0, 50, 0, 50), ('insert', 50, 50, 50, 51), ('equal', 50, 100, 51, 101)] - ref = "a b c d e f" - hyp = "a b d e kfmod fgdjn idf giudfg diuf dufg idgiudgd" - sm = differ(ref, hyp) - assert list(sm.get_opcodes()) == [('equal', 0, 3, 0, 3), - ('delete', 3, 5, 3, 3), - ('equal', 5, 10, 3, 8), - ('insert', 10, 10, 8, 9), - ('equal', 10, 11, 9, 10), - ('insert', 11, 11, 10, 49)] @differs_decorator @@ -31,3 +47,15 @@ def test_one_delete(differ): assert list(sm.get_opcodes()) == [('equal', 0, 40, 0, 40), ('delete', 40, 41, 40, 40), ('equal', 41, 81, 40, 80)] + + +def test_ratcliffobershelp(): + ref = "a b c d e f" + hyp = "a b d e kfmod fgdjn idf giudfg diuf dufg idgiudgd" + sm = RatcliffObershelp(ref, hyp) + assert list(sm.get_opcodes()) == [('equal', 0, 3, 0, 3), + ('delete', 3, 5, 3, 3), + ('equal', 5, 10, 3, 8), + ('insert', 10, 10, 8, 9), + ('equal', 10, 11, 9, 10), + ('insert', 11, 11, 10, 49)] diff --git a/tests/benchmarkstt/test_metrics_core.py b/tests/benchmarkstt/test_metrics_core.py index a00ffe45..55fc7eff 100644 --- a/tests/benchmarkstt/test_metrics_core.py +++ b/tests/benchmarkstt/test_metrics_core.py @@ -38,7 +38,8 @@ def test_wer(a, b, exp): assert WER(mode=WER.MODE_STRICT).compare(PlainText(a), PlainText(b)) == wer_strict assert WER(mode=WER.MODE_HUNT).compare(PlainText(a), PlainText(b)) == wer_hunt - assert WER(mode=WER.MODE_LEVENSHTEIN).compare(PlainText(a), PlainText(b)) == wer_levenshtein + assert WER(differ_class='levenshtein').compare(PlainText(a), PlainText(b)) == wer_levenshtein + @pytest.mark.parametrize('a,b,entities_list,weights,exp_beer,exp_occ', [ @@ -113,3 +114,4 @@ def test_cer(a, b, exp): cer_levenshtein, = exp assert CER(mode=CER.MODE_LEVENSHTEIN).compare(PlainText(a), PlainText(b)) == cer_levenshtein + From 950c08122c75522f9317f963af155a0a6e98a766 Mon Sep 17 00:00:00 2001 From: MikeSmithEU Date: Sat, 7 Nov 2020 22:50:13 +0100 Subject: [PATCH 2/5] add get_opcode_counts to Diff classes (from metrics) --- src/benchmarkstt/diff/__init__.py | 37 ++++++++++++++-- src/benchmarkstt/metrics/core.py | 27 +----------- tests/benchmarkstt/test_cli.py | 42 +++++++++--------- tests/benchmarkstt/test_diff.py | 73 +++++++++++++++++++------------ 4 files changed, 100 insertions(+), 79 deletions(-) diff --git a/src/benchmarkstt/diff/__init__.py b/src/benchmarkstt/diff/__init__.py index 889d4ef5..5c537778 100644 --- a/src/benchmarkstt/diff/__init__.py +++ b/src/benchmarkstt/diff/__init__.py @@ -2,11 +2,39 @@ Responsible for calculating differences. """ -from abc import ABC, abstractmethod +from abc import ABC, ABCMeta, abstractmethod from benchmarkstt.factory import CoreFactory +from collections import namedtuple -class Differ(ABC): +OpcodeCounts = namedtuple('OpcodeCounts', + ('equal', 'replace', 'insert', 'delete')) + + +def get_opcode_counts(opcodes) -> OpcodeCounts: + counts = OpcodeCounts(0, 0, 0, 0)._asdict() + for tag, alo, ahi, blo, bhi in opcodes: + if tag == 'equal': + counts[tag] += ahi - alo + elif tag == 'insert': + counts[tag] += bhi - blo + elif tag == 'delete': + counts[tag] += ahi - alo + elif tag == 'replace': + ca = ahi - alo + cb = bhi - blo + if ca < cb: + counts['insert'] += cb - ca + counts['replace'] += ca + elif ca > cb: + counts['delete'] += ca - cb + counts['replace'] += cb + else: + counts[tag] += ahi - alo + return OpcodeCounts(counts['equal'], counts['replace'], counts['insert'], counts['delete']) + + +class DifferInterface(ABC): @abstractmethod def __init__(self, a, b): """ @@ -32,5 +60,8 @@ def get_opcodes(self): """ raise NotImplementedError() + @abstractmethod + def get_opcode_counts(self): + raise NotImplementedError() -factory = CoreFactory(Differ, False) +factory = CoreFactory(DifferInterface, False) diff --git a/src/benchmarkstt/metrics/core.py b/src/benchmarkstt/metrics/core.py index 4c419edc..2902ad7a 100644 --- a/src/benchmarkstt/metrics/core.py +++ b/src/benchmarkstt/metrics/core.py @@ -19,29 +19,6 @@ def traversible(schema, key=None): return [word[key] for word in schema] -def get_opcode_counts(opcodes) -> OpcodeCounts: - counts = OpcodeCounts(0, 0, 0, 0)._asdict() - for tag, alo, ahi, blo, bhi in opcodes: - if tag == 'equal': - counts[tag] += ahi - alo - elif tag == 'insert': - counts[tag] += bhi - blo - elif tag == 'delete': - counts[tag] += ahi - alo - elif tag == 'replace': - ca = ahi - alo - cb = bhi - blo - if ca < cb: - counts['insert'] += cb - ca - counts['replace'] += ca - elif ca > cb: - counts['delete'] += ca - cb - counts['replace'] += cb - else: - counts[tag] += ahi - alo - return OpcodeCounts(counts['equal'], counts['replace'], counts['insert'], counts['delete']) - - def get_differ(a, b, differ_class: Differ): if differ_class is None or differ_class == '': differ_class = RatcliffObershelp @@ -125,7 +102,7 @@ def compare(self, ref: Schema, hyp: Schema) -> float: diffs = get_differ(ref, hyp, differ_class=self._differ_class) - counts = get_opcode_counts(diffs.get_opcodes()) + counts = diffs.get_opcode_counts() changes = counts.replace * self.SUB_PENALTY + \ counts.delete * self.DEL_PENALTY + \ @@ -199,7 +176,7 @@ def __init__(self, differ_class: Differ = None): def compare(self, ref: Schema, hyp: Schema) -> OpcodeCounts: diffs = get_differ(ref, hyp, differ_class=self._differ_class) - return get_opcode_counts(diffs.get_opcodes()) + return diffs.get_opcode_counts() class BEER(Metric): diff --git a/tests/benchmarkstt/test_cli.py b/tests/benchmarkstt/test_cli.py index 02f444f2..46c8bb4f 100644 --- a/tests/benchmarkstt/test_cli.py +++ b/tests/benchmarkstt/test_cli.py @@ -71,28 +71,26 @@ ']}\n]\n' ], ['normalization -i ./resources/test/_data/candide.txt ./resources/test/_data/candide.txt -o /dev/null', 2], - [ - 'metrics -r "HELLO WORLD OF MINE" --hypothesis "GOODBYE CRUEL WORLD OF MINE" -rt argument -ht argument ' - '--worddiffs levenshtein --output-format json', - '[\n\t{"title": "worddiffs", "result": [' - '{"type": "replace", "reference": "HELLO", "hypothesis": "GOODBYE"}, ' - '{"type": "insert", "reference": null, "hypothesis": "CRUEL"}, ' - '{"type": "equal", "reference": "WORLD", "hypothesis": "WORLD"}, ' - '{"type": "equal", "reference": "OF", "hypothesis": "OF"}, ' - '{"type": "equal", "reference": "MINE", "hypothesis": "MINE"}' - ']}\n]\n' - ], - [ - 'metrics -r "HELLO WORLD OF MINE" --hypothesis "GOODBYE CRUEL WORLD OF MINE" -rt argument -ht argument ' - '--worddiffs --output-format json', - '[\n\t{"title": "worddiffs", "result": [' - '{"type": "replace", "reference": "HELLO", "hypothesis": "GOODBYE"}, ' - '{"type": "insert", "reference": null, "hypothesis": "CRUEL"}, ' - '{"type": "equal", "reference": "WORLD", "hypothesis": "WORLD"}, ' - '{"type": "equal", "reference": "OF", "hypothesis": "OF"}, ' - '{"type": "equal", "reference": "MINE", "hypothesis": "MINE"}' - ']}\n]\n' - ], + ['metrics -r "HELLO WORLD OF MINE" --hypothesis "GOODBYE CRUEL WORLD OF MINE" -rt argument -ht argument ' + '--worddiffs ratcliffobershelp --output-format json', + '[\n\t{"title": "worddiffs", "result": [' + '{"type": "replace", "reference": "HELLO", "hypothesis": "GOODBYE"}, ' + '{"type": "insert", "reference": null, "hypothesis": "CRUEL"}, ' + '{"type": "equal", "reference": "WORLD", "hypothesis": "WORLD"}, ' + '{"type": "equal", "reference": "OF", "hypothesis": "OF"}, ' + '{"type": "equal", "reference": "MINE", "hypothesis": "MINE"}' + ']}\n]\n' + ], + ['metrics -r "HELLO WORLD OF MINE" --hypothesis "GOODBYE CRUEL WORLD OF MINE" -rt argument -ht argument ' + '--worddiffs --output-format json', + '[\n\t{"title": "worddiffs", "result": [' + '{"type": "replace", "reference": "HELLO", "hypothesis": "GOODBYE"}, ' + '{"type": "insert", "reference": null, "hypothesis": "CRUEL"}, ' + '{"type": "equal", "reference": "WORLD", "hypothesis": "WORLD"}, ' + '{"type": "equal", "reference": "OF", "hypothesis": "OF"}, ' + '{"type": "equal", "reference": "MINE", "hypothesis": "MINE"}' + ']}\n]\n' + ], ['metrics -r "HELLO CRUEL WORLD OF MINE" -h "GOODBYE WORLD OF MINE" -rt argument -ht argument ' '--worddiffs --output-format json', '[\n\t{"title": "worddiffs", "result": [' diff --git a/tests/benchmarkstt/test_diff.py b/tests/benchmarkstt/test_diff.py index 285754bf..5504eb83 100644 --- a/tests/benchmarkstt/test_diff.py +++ b/tests/benchmarkstt/test_diff.py @@ -1,22 +1,32 @@ from benchmarkstt import diff -from benchmarkstt.diff.core import RatcliffObershelp +from benchmarkstt.diff.core import RatcliffObershelp, Levenshtein import pytest differs = [differ.cls for differ in diff.factory if differ.name != 'levenshtein'] differs_decorator = pytest.mark.parametrize('differ', differs) +all_differs = [differ.cls for differ in diff.factory] +all_differs_decorator = pytest.mark.parametrize('differ', all_differs) -@differs_decorator -def test_simplest(differ): - sm = differ( - list('012345'), - list('023345') - ) - assert list(sm.get_opcodes()) == [('equal', 0, 1, 0, 1), - ('delete', 1, 2, 1, 1), - ('equal', 2, 3, 1, 2), - ('insert', 3, 3, 2, 3), - ('equal', 3, 6, 3, 6)] + +def clean_opcode(opcode): + kind, alo, ahi, blo, bhi = opcode + if kind == 'delete': # blo and bhi are irrelevant + blo = bhi = None + elif kind == 'insert': + ahi = None + return kind, alo, ahi, blo, bhi + + +def clean_opcodes(opcodes): + return list(map(clean_opcode, opcodes)) + + +def test_simple_levenshtein_ratcliff_similarity(): + a = list('012345') + b = list('023x45') + assert clean_opcodes(Levenshtein(a, b).get_opcodes()) == \ + clean_opcodes(RatcliffObershelp(a, b).get_opcodes()) @differs_decorator @@ -25,37 +35,42 @@ def test_simple(differ): '0123456HIJkopq', '0123456HIJKlmnopq' ) - assert list(sm.get_opcodes()) == [('equal', 0, 10, 0, 10), - ('replace', 10, 11, 10, 14), - ('equal', 11, 14, 14, 17)] + assert clean_opcodes(sm.get_opcodes()) == \ + clean_opcodes([('equal', 0, 10, 0, 10), + ('replace', 10, 11, 10, 14), + ('equal', 11, 14, 14, 17)]) @differs_decorator def test_one_insert(differ): sm = differ('b' * 100, 'a' + 'b' * 100) - assert list(sm.get_opcodes()) == [('insert', 0, 0, 0, 1), - ('equal', 0, 100, 1, 101)] + assert clean_opcodes(sm.get_opcodes()) == \ + clean_opcodes([('insert', 0, 0, 0, 1), + ('equal', 0, 100, 1, 101)]) sm = differ('b' * 100, 'b' * 50 + 'a' + 'b' * 50) - assert list(sm.get_opcodes()) == [('equal', 0, 50, 0, 50), - ('insert', 50, 50, 50, 51), - ('equal', 50, 100, 51, 101)] + assert clean_opcodes(sm.get_opcodes()) == \ + clean_opcodes([('equal', 0, 50, 0, 50), + ('insert', 50, 50, 50, 51), + ('equal', 50, 100, 51, 101)]) @differs_decorator def test_one_delete(differ): sm = differ('a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40) - assert list(sm.get_opcodes()) == [('equal', 0, 40, 0, 40), - ('delete', 40, 41, 40, 40), - ('equal', 41, 81, 40, 80)] + assert clean_opcodes(sm.get_opcodes()) == \ + clean_opcodes([('equal', 0, 40, 0, 40), + ('delete', 40, 41, 40, 40), + ('equal', 41, 81, 40, 80)]) def test_ratcliffobershelp(): ref = "a b c d e f" hyp = "a b d e kfmod fgdjn idf giudfg diuf dufg idgiudgd" sm = RatcliffObershelp(ref, hyp) - assert list(sm.get_opcodes()) == [('equal', 0, 3, 0, 3), - ('delete', 3, 5, 3, 3), - ('equal', 5, 10, 3, 8), - ('insert', 10, 10, 8, 9), - ('equal', 10, 11, 9, 10), - ('insert', 11, 11, 10, 49)] + assert clean_opcodes(sm.get_opcodes()) == \ + clean_opcodes([('equal', 0, 3, 0, 3), + ('delete', 3, 5, 3, 3), + ('equal', 5, 10, 3, 8), + ('insert', 10, 10, 8, 9), + ('equal', 10, 11, 9, 10), + ('insert', 11, 11, 10, 49)]) From 20b95706bba678e6c12f30dfdbce68ccfb593fc9 Mon Sep 17 00:00:00 2001 From: MikeSmithEU Date: Sun, 8 Nov 2020 00:38:57 +0100 Subject: [PATCH 3/5] get_error_rate for WER --- setup.py | 1 + src/benchmarkstt/diff/__init__.py | 18 +++++++++ src/benchmarkstt/diff/core.py | 6 +++ src/benchmarkstt/metrics/core.py | 51 ++++++++++--------------- src/benchmarkstt/schema.py | 4 +- tests/benchmarkstt/test_diff.py | 25 ++++++------ tests/benchmarkstt/test_metrics_core.py | 4 +- 7 files changed, 62 insertions(+), 47 deletions(-) diff --git a/setup.py b/setup.py index e8c5ba16..73a2f602 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ def filter_requirements(line): 'jsonrpcserver>=4.0.1', 'gunicorn>=19.9.0', 'docutils>=0.14', + 'edit_distance>=1.0.4', 'editdistance>=0.5.3', 'Unidecode>=1.1.2', ], diff --git a/src/benchmarkstt/diff/__init__.py b/src/benchmarkstt/diff/__init__.py index 5c537778..cd1f22ff 100644 --- a/src/benchmarkstt/diff/__init__.py +++ b/src/benchmarkstt/diff/__init__.py @@ -64,4 +64,22 @@ def get_opcodes(self): def get_opcode_counts(self): raise NotImplementedError() + @abstractmethod + def get_error_rate(self): + raise NotImplementedError() + + +class Differ(DifferInterface, metaclass=ABCMeta): + def get_opcode_counts(self): + return get_opcode_counts(self.get_opcodes()) + + def get_error_rate(self): + counts = self.get_opcode_counts() + + changes = counts.replace + counts.delete + counts.insert + total = counts.equal + counts.replace + counts.delete + + return changes / total + + factory = CoreFactory(DifferInterface, False) diff --git a/src/benchmarkstt/diff/core.py b/src/benchmarkstt/diff/core.py index 49b99318..fe04011e 100644 --- a/src/benchmarkstt/diff/core.py +++ b/src/benchmarkstt/diff/core.py @@ -5,6 +5,7 @@ from difflib import SequenceMatcher from benchmarkstt.diff import Differ import edit_distance +import editdistance class RatcliffObershelp(Differ): @@ -55,6 +56,11 @@ def __init__(self, a, b, **kwargs): def get_opcodes(self): return self.simplify_opcodes(self._matcher.get_opcodes()) + def get_error_rate(self): + a = self._kwargs['a'] + b = self._kwargs['b'] + return editdistance.eval(a, b) / len(a) + @staticmethod def simplify_opcodes(opcodes): new_codes = [] diff --git a/src/benchmarkstt/metrics/core.py b/src/benchmarkstt/metrics/core.py index 2902ad7a..db2b1352 100644 --- a/src/benchmarkstt/metrics/core.py +++ b/src/benchmarkstt/metrics/core.py @@ -1,6 +1,6 @@ -from benchmarkstt.schema import Schema +from benchmarkstt.schema import Schema, Item import logging -from benchmarkstt.diff import Differ +from benchmarkstt.diff import Differ, factory as differ_factory from benchmarkstt.diff.core import RatcliffObershelp from benchmarkstt.diff.formatter import format_diff from benchmarkstt.metrics import Metric @@ -12,11 +12,13 @@ OpcodeCounts = namedtuple('OpcodeCounts', ('equal', 'replace', 'insert', 'delete')) +type_schema = Union[Schema, list] + def traversible(schema, key=None): if key is None: key = 'item' - return [word[key] for word in schema] + return [item if type(item) is str else item[key] for item in schema] def get_differ(a, b, differ_class: Differ): @@ -41,7 +43,7 @@ def __init__(self, differ_class: Differ = None, dialect: str = None): self._differ_class = differ_class self._dialect = dialect - def compare(self, ref: Schema, hyp: Schema): + def compare(self, ref: type_schema, hyp: type_schema): differ = get_differ(ref, hyp, differ_class=self._differ_class) a = traversible(ref) b = traversible(hyp) @@ -82,24 +84,17 @@ class WER(Metric): INS_PENALTY = 1 SUB_PENALTY = 1 - def __init__(self, mode=None, differ_class: Differ = None): + def __init__(self, mode=None, differ_class: Union[str, Differ, None] = None): self._mode = mode if differ_class is None: differ_class = RatcliffObershelp self._differ_class = differ_class + if mode == self.MODE_HUNT: self.DEL_PENALTY = self.INS_PENALTY = .5 - def compare(self, ref: Schema, hyp: Schema) -> float: - if self._mode == self.MODE_LEVENSHTEIN: - ref_list = [i['item'] for i in ref] - hyp_list = [i['item'] for i in hyp] - total_ref = len(ref_list) - if total_ref == 0: - return 1 - return editdistance.eval(ref_list, hyp_list) / total_ref - + def compare(self, ref: type_schema, hyp: type_schema) -> float: diffs = get_differ(ref, hyp, differ_class=self._differ_class) counts = diffs.get_opcode_counts() @@ -141,25 +136,21 @@ class CER(Metric): will first be split into words, ['aa','bb','cc'], and then merged into a final string for evaluation: 'aabbcc'. - :param mode: 'levenshtein' (default). - :param differ_class: For future use. + :param differ_class: see :py:mod:`benchmarkstt.Differ.core` """ - # CER modes - MODE_LEVENSHTEIN = 'levenshtein' + def __init__(self, differ_class: Union[str, Differ, None] = None): + self._differ_class = Levenshtein if differ_class is None else differ_class - def __init__(self, mode=None, differ_class=None): - self._mode = mode - - def compare(self, ref: Schema, hyp: Schema): - ref_str = ''.join([i['item'] for i in ref]) - hyp_str = ''.join([i['item'] for i in hyp]) - total_ref = len(ref_str) + def compare(self, ref: type_schema, hyp: type_schema): + ref_str = ''.join(traversible(ref)) + hyp_str = ''.join(traversible(hyp)) - if total_ref == 0: + if len(ref_str) == 0: return 0 if len(hyp_str) == 0 else 1 - return editdistance.eval(ref_str, hyp_str) / total_ref + diffs = get_differ(ref_str, hyp_str, differ_class=self._differ_class) + return diffs.get_error_rate() class DiffCounts(Metric): @@ -169,12 +160,10 @@ class DiffCounts(Metric): :param differ_class: see :py:mod:`benchmarkstt.Differ.core` """ - def __init__(self, differ_class: Differ = None): - if differ_class is None: - differ_class = RatcliffObershelp + def __init__(self, differ_class: Union[str, Differ, None] = None): self._differ_class = differ_class - def compare(self, ref: Schema, hyp: Schema) -> OpcodeCounts: + def compare(self, ref: type_schema, hyp: type_schema) -> OpcodeCounts: diffs = get_differ(ref, hyp, differ_class=self._differ_class) return diffs.get_opcode_counts() diff --git a/src/benchmarkstt/schema.py b/src/benchmarkstt/schema.py index af581b2f..7c67bfca 100644 --- a/src/benchmarkstt/schema.py +++ b/src/benchmarkstt/schema.py @@ -3,7 +3,6 @@ """ import json from collections.abc import Mapping -from typing import Union from collections import defaultdict @@ -51,6 +50,9 @@ def __iter__(self): def __repr__(self): return 'Item(%s)' % (self.json(),) + def __hash__(self): + return hash(self._val['item']) + def json(self, **kwargs): return Schema.dumps(self, **kwargs) diff --git a/tests/benchmarkstt/test_diff.py b/tests/benchmarkstt/test_diff.py index 5504eb83..fbc83151 100644 --- a/tests/benchmarkstt/test_diff.py +++ b/tests/benchmarkstt/test_diff.py @@ -25,8 +25,8 @@ def clean_opcodes(opcodes): def test_simple_levenshtein_ratcliff_similarity(): a = list('012345') b = list('023x45') - assert clean_opcodes(Levenshtein(a, b).get_opcodes()) == \ - clean_opcodes(RatcliffObershelp(a, b).get_opcodes()) + assert(clean_opcodes(Levenshtein(a, b).get_opcodes()) == + clean_opcodes(RatcliffObershelp(a, b).get_opcodes())) @differs_decorator @@ -35,42 +35,43 @@ def test_simple(differ): '0123456HIJkopq', '0123456HIJKlmnopq' ) - assert clean_opcodes(sm.get_opcodes()) == \ + assert(clean_opcodes(sm.get_opcodes()) == clean_opcodes([('equal', 0, 10, 0, 10), ('replace', 10, 11, 10, 14), - ('equal', 11, 14, 14, 17)]) + ('equal', 11, 14, 14, 17)])) @differs_decorator def test_one_insert(differ): sm = differ('b' * 100, 'a' + 'b' * 100) - assert clean_opcodes(sm.get_opcodes()) == \ + assert(clean_opcodes(sm.get_opcodes()) == clean_opcodes([('insert', 0, 0, 0, 1), - ('equal', 0, 100, 1, 101)]) + ('equal', 0, 100, 1, 101)])) + sm = differ('b' * 100, 'b' * 50 + 'a' + 'b' * 50) - assert clean_opcodes(sm.get_opcodes()) == \ + assert(clean_opcodes(sm.get_opcodes()) == clean_opcodes([('equal', 0, 50, 0, 50), ('insert', 50, 50, 50, 51), - ('equal', 50, 100, 51, 101)]) + ('equal', 50, 100, 51, 101)])) @differs_decorator def test_one_delete(differ): sm = differ('a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40) - assert clean_opcodes(sm.get_opcodes()) == \ + assert(clean_opcodes(sm.get_opcodes()) == clean_opcodes([('equal', 0, 40, 0, 40), ('delete', 40, 41, 40, 40), - ('equal', 41, 81, 40, 80)]) + ('equal', 41, 81, 40, 80)])) def test_ratcliffobershelp(): ref = "a b c d e f" hyp = "a b d e kfmod fgdjn idf giudfg diuf dufg idgiudgd" sm = RatcliffObershelp(ref, hyp) - assert clean_opcodes(sm.get_opcodes()) == \ + assert(clean_opcodes(sm.get_opcodes()) == clean_opcodes([('equal', 0, 3, 0, 3), ('delete', 3, 5, 3, 3), ('equal', 5, 10, 3, 8), ('insert', 10, 10, 8, 9), ('equal', 10, 11, 9, 10), - ('insert', 11, 11, 10, 49)]) + ('insert', 11, 11, 10, 49)])) diff --git a/tests/benchmarkstt/test_metrics_core.py b/tests/benchmarkstt/test_metrics_core.py index 55fc7eff..711c5f9c 100644 --- a/tests/benchmarkstt/test_metrics_core.py +++ b/tests/benchmarkstt/test_metrics_core.py @@ -41,7 +41,6 @@ def test_wer(a, b, exp): assert WER(differ_class='levenshtein').compare(PlainText(a), PlainText(b)) == wer_levenshtein - @pytest.mark.parametrize('a,b,entities_list,weights,exp_beer,exp_occ', [ ['madam is here', 'adam is here', ['madam', 'here'], [100, 10], (1.0, 0.0), (1, 1)], ['theresa may is here', 'theresa may is there', ['theresa may', 'here'], [10, 100], (0.0, 1.0), (1, 1)], @@ -113,5 +112,4 @@ def test_wa_beer(a, b, entities_list, weights, exp): def test_cer(a, b, exp): cer_levenshtein, = exp - assert CER(mode=CER.MODE_LEVENSHTEIN).compare(PlainText(a), PlainText(b)) == cer_levenshtein - + assert CER(differ_class='levenshtein').compare(PlainText(a), PlainText(b)) == cer_levenshtein From ab2217f5858ad469283ff36412ef66f48415b325 Mon Sep 17 00:00:00 2001 From: MikeSmithEU Date: Sun, 8 Nov 2020 00:55:22 +0100 Subject: [PATCH 4/5] cleanup --- docs/ext/autoclassmembersdiagram.py | 1 + src/benchmarkstt/diff/__init__.py | 5 +++++ src/benchmarkstt/metrics/core.py | 27 +++++++++++++++------------ 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/docs/ext/autoclassmembersdiagram.py b/docs/ext/autoclassmembersdiagram.py index c603202c..cb975f1f 100644 --- a/docs/ext/autoclassmembersdiagram.py +++ b/docs/ext/autoclassmembersdiagram.py @@ -39,6 +39,7 @@ class MagicTraits(object): "__getattr__": "attributes", "__getattribute__": "attributes", "__len__": "len", + "__hash__": "hashable", "__subclasshook__": False, "__repr__": False, "__str__": False, diff --git a/src/benchmarkstt/diff/__init__.py b/src/benchmarkstt/diff/__init__.py index cd1f22ff..3da540a5 100644 --- a/src/benchmarkstt/diff/__init__.py +++ b/src/benchmarkstt/diff/__init__.py @@ -70,6 +70,11 @@ def get_error_rate(self): class Differ(DifferInterface, metaclass=ABCMeta): + """ + Provides pre-made (probably sub-optimal) implementations of + get_opcode_counts() and get_error_rate() + """ + def get_opcode_counts(self): return get_opcode_counts(self.get_opcodes()) diff --git a/src/benchmarkstt/metrics/core.py b/src/benchmarkstt/metrics/core.py index db2b1352..8612f0d3 100644 --- a/src/benchmarkstt/metrics/core.py +++ b/src/benchmarkstt/metrics/core.py @@ -1,7 +1,9 @@ -from benchmarkstt.schema import Schema, Item +from benchmarkstt.schema import Schema import logging -from benchmarkstt.diff import Differ, factory as differ_factory -from benchmarkstt.diff.core import RatcliffObershelp +from collections import namedtuple +from typing import Union +from benchmarkstt.diff import DifferInterface, factory as differ_factory +from benchmarkstt.diff.core import RatcliffObershelp, Levenshtein from benchmarkstt.diff.formatter import format_diff from benchmarkstt.metrics import Metric from collections import namedtuple @@ -13,6 +15,7 @@ ('equal', 'replace', 'insert', 'delete')) type_schema = Union[Schema, list] +type_differ = DifferInterface def traversible(schema, key=None): @@ -21,7 +24,7 @@ def traversible(schema, key=None): return [item if type(item) is str else item[key] for item in schema] -def get_differ(a, b, differ_class: Differ): +def get_differ(a, b, differ_class: type_differ): if differ_class is None or differ_class == '': differ_class = RatcliffObershelp elif type(differ_class) is str: @@ -33,13 +36,13 @@ class WordDiffs(Metric): """ Present differences on a per-word basis - :param differ_class: see :py:mod:`benchmarkstt.Differ.core` + :param differ_class: see :py:mod:`benchmarkstt.diff.core` :param dialect: Presentation format. Default is 'ansi'. :example differ_class: 'levenshtein' :example dialect: 'html' """ - def __init__(self, differ_class: Differ = None, dialect: str = None): + def __init__(self, differ_class: type_differ = None, dialect: str = None): self._differ_class = differ_class self._dialect = dialect @@ -73,7 +76,7 @@ class WER(Metric): See https://docs.python.org/3/library/difflib.html :param mode: 'strict' (default), 'hunt' or 'levenshtein'. - :param differ_class: see :py:mod:`benchmarkstt.Differ.core` + :param differ_class: see :py:mod:`benchmarkstt.diff.core` """ # WER modes @@ -84,7 +87,7 @@ class WER(Metric): INS_PENALTY = 1 SUB_PENALTY = 1 - def __init__(self, mode=None, differ_class: Union[str, Differ, None] = None): + def __init__(self, mode=None, differ_class: Union[str, type_differ, None] = None): self._mode = mode if differ_class is None: @@ -136,10 +139,10 @@ class CER(Metric): will first be split into words, ['aa','bb','cc'], and then merged into a final string for evaluation: 'aabbcc'. - :param differ_class: see :py:mod:`benchmarkstt.Differ.core` + :param differ_class: see :py:mod:`benchmarkstt.diff.core` """ - def __init__(self, differ_class: Union[str, Differ, None] = None): + def __init__(self, differ_class: Union[str, type_differ, None] = None): self._differ_class = Levenshtein if differ_class is None else differ_class def compare(self, ref: type_schema, hyp: type_schema): @@ -157,10 +160,10 @@ class DiffCounts(Metric): """ Get the amount of differences between reference and hypothesis - :param differ_class: see :py:mod:`benchmarkstt.Differ.core` + :param differ_class: see :py:mod:`benchmarkstt.diff.core` """ - def __init__(self, differ_class: Union[str, Differ, None] = None): + def __init__(self, differ_class: Union[str, type_differ, None] = None): self._differ_class = differ_class def compare(self, ref: type_schema, hyp: type_schema) -> OpcodeCounts: From c23c4a4839035b4a1d67691190a302ad93bc2cd1 Mon Sep 17 00:00:00 2001 From: MikeSmithEU Date: Tue, 12 Jan 2021 13:18:21 +0100 Subject: [PATCH 5/5] Don't support opcodes for levenshtein (missing or incorrectly implemented in all tested libraries) --- src/benchmarkstt/diff/core.py | 7 +++++-- src/benchmarkstt/metrics/core.py | 23 +++++++++++++---------- tests/benchmarkstt/test_diff.py | 10 +++++----- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/benchmarkstt/diff/core.py b/src/benchmarkstt/diff/core.py index fe04011e..17e98021 100644 --- a/src/benchmarkstt/diff/core.py +++ b/src/benchmarkstt/diff/core.py @@ -54,12 +54,15 @@ def __init__(self, a, b, **kwargs): self._matcher = edit_distance.SequenceMatcher(**self._kwargs) def get_opcodes(self): - return self.simplify_opcodes(self._matcher.get_opcodes()) + raise NotImplementedError("not supported by %r" % (self,)) def get_error_rate(self): a = self._kwargs['a'] b = self._kwargs['b'] - return editdistance.eval(a, b) / len(a) + len_a = len(a) + if len_a == 0: + return 0 if len(b) == 0 else 1 + return editdistance.eval(a, b) / len_a @staticmethod def simplify_opcodes(opcodes): diff --git a/src/benchmarkstt/metrics/core.py b/src/benchmarkstt/metrics/core.py index 8612f0d3..3d55ce0c 100644 --- a/src/benchmarkstt/metrics/core.py +++ b/src/benchmarkstt/metrics/core.py @@ -100,16 +100,19 @@ def __init__(self, mode=None, differ_class: Union[str, type_differ, None] = None def compare(self, ref: type_schema, hyp: type_schema) -> float: diffs = get_differ(ref, hyp, differ_class=self._differ_class) - counts = diffs.get_opcode_counts() - - changes = counts.replace * self.SUB_PENALTY + \ - counts.delete * self.DEL_PENALTY + \ - counts.insert * self.INS_PENALTY - - total = counts.equal + counts.replace + counts.delete - if total == 0: - return 1 if changes else 0 - return changes / total + try: + counts = diffs.get_opcode_counts() + + changes = counts.replace * self.SUB_PENALTY + \ + counts.delete * self.DEL_PENALTY + \ + counts.insert * self.INS_PENALTY + + total = counts.equal + counts.replace + counts.delete + if total == 0: + return 1 if changes else 0 + return changes / total + except NotImplementedError: + return diffs.get_error_rate() class CER(Metric): diff --git a/tests/benchmarkstt/test_diff.py b/tests/benchmarkstt/test_diff.py index fbc83151..4999b07e 100644 --- a/tests/benchmarkstt/test_diff.py +++ b/tests/benchmarkstt/test_diff.py @@ -22,11 +22,11 @@ def clean_opcodes(opcodes): return list(map(clean_opcode, opcodes)) -def test_simple_levenshtein_ratcliff_similarity(): - a = list('012345') - b = list('023x45') - assert(clean_opcodes(Levenshtein(a, b).get_opcodes()) == - clean_opcodes(RatcliffObershelp(a, b).get_opcodes())) +# def test_simple_levenshtein_ratcliff_similarity(): +# a = list('012345') +# b = list('023x45') +# assert(clean_opcodes(Levenshtein(a, b).get_opcodes()) == +# clean_opcodes(RatcliffObershelp(a, b).get_opcodes())) @differs_decorator