From 2e99d35f9e2931260552f27549f01169421ab8fd Mon Sep 17 00:00:00 2001 From: Gabriele Sarti Date: Thu, 13 Apr 2023 17:07:34 +0200 Subject: [PATCH 01/23] Added Granular Tagger template --- divemt/qe_taggers.py | 84 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/divemt/qe_taggers.py b/divemt/qe_taggers.py index 8576663..9f91f6d 100644 --- a/divemt/qe_taggers.py +++ b/divemt/qe_taggers.py @@ -400,3 +400,87 @@ def generate_tags( ) clear_nlp_cache() return src_tags, mt_tags + + +class NameTBDTagger(QETagger): + + def __init__( + self, + aligner: Optional[SentenceAligner] = None, + ): + self.aligner = aligner if aligner else SentenceAligner(model="bert", token_type="bpe", matching_methods="mai") + + def align_source_mt( + self, + src_tokens: List[List[str]], + mt_tokens: List[List[str]], + src_langs: List[str], + mt_langs: List[str], + ) -> List[List[Tuple[int, int]]]: + return [ + self.aligner.get_word_aligns(src_tok, mt_tok)["inter"] + for src_tok, mt_tok in tqdm( + zip(src_tokens, mt_tokens), total=len(src_tokens), desc="Aligning src-mt" + ) + ] + + def align_mt_pe( + self, + mt_tokens: List[List[str]], + pe_tokens: List[List[str]], + langs: List[str], + ) -> List[Tuple[int, int]]: + return [ + self.aligner.get_word_aligns(mt_tok, pe_tok)["inter"] + for mt_tok, pe_tok in tqdm( + zip(mt_tokens, pe_tokens), total=len(mt_tokens), desc="Aligning mt-pe" + ) + ] + + @staticmethod + def tags_from_edits( + mt_tokens: List[List[str]], + pe_tokens: List[List[str]], + alignments: List[List[Tuple[int, int]]], + ) -> List[List[str]]: + """ Produce tags on MT tokens from edits found in the PE tokens. """ + # 1:1 match: OK if same, SUB if different + # 1:n match: + # - Find highest match for 1 in n (lexical, LaBSE if not found) + # - If all matches are < threshold, tag as EXP (expansion) + # - Else, assign OK if same, SUB if different + # - If match preceded by some of the n, assign also INS to match + # - If match followed by some of the n, push an INS tag to the next token + # n:1 match: + # - Find highest match for 1 in n (lexical, LaBSE if not found) + # - If all matches are < threshold, tag as CON (contraction) + # - Else, assign OK if same, SUB if different + # - All n different than match are assigned DEL + # n:m match: + # - For each 1 in n, find highest match for 1 in m (lexical, LaBSE if not found, from highest score to lowest) + # - If all matches are < threshold, skip and continue + # - Else assign OK if same, SUB if different, remove from available m matches + # If in a block with multiple crossing alignments (with blocks named A, B, ...): + # - Swapped pair A, B -> B, A: Both blocks recive SHF + # - For n > 2, all blocks changing relative position recive SHF, others don't + raise NotImplementedError() + + @staticmethod + def tags_to_source( + src_tokens: List[List[str]], + mt_tokens: List[List[str]], + alignments: List[List[Tuple[int, int]]], + mt_tags: List[List[str]], + ) -> List[List[str]]: + """ Propagate tags from MT to source. """ + # 1:1 match: copy tags from MT + # 1:n match: + # - Find highest match for 1 in n (lexical, LaBSE if not found) + # - If all matches are < threshold, TBD + # - Else, copy tags from top match in MT and ignore other matches + # n:1 match: copy tags from 1 to all n + # n:m match: + # - For each 1 in n, find highest match for 1 in m (lexical, LaBSE if not found) + # - If all matches are < threshold, ignore and continue + # - Copy tags from top match in MT and ignore other matches + raise NotImplementedError() From 6a9a04aa815e47a099cda1ec072009a632df21fd Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Thu, 27 Apr 2023 21:35:07 +0200 Subject: [PATCH 02/23] feat: tags from edits /w tests --- divemt/qe_taggers.py | 209 ++++++++++++++++++++++- pyproject.toml | 1 + tests/test_qe_taggers_name_tbd_tagger.py | 190 +++++++++++++++++++++ 3 files changed, 391 insertions(+), 9 deletions(-) create mode 100644 tests/test_qe_taggers_name_tbd_tagger.py diff --git a/divemt/qe_taggers.py b/divemt/qe_taggers.py index 9f91f6d..a3b8d0f 100644 --- a/divemt/qe_taggers.py +++ b/divemt/qe_taggers.py @@ -3,13 +3,16 @@ import subprocess from abc import ABC, abstractmethod from collections import defaultdict +from itertools import groupby from pathlib import Path -from typing import List, Optional, Tuple, Union +from typing import List, Optional, Tuple, Union, Set, Generator from xml.sax.saxutils import escape +import numpy as np from simalign import SentenceAligner from strenum import StrEnum from tqdm import tqdm +import Levenshtein as lev from .parse_utils import clear_nlp_cache, tokenize from .wmt22qe_utils import align_sentence_tercom, parse_tercom_xml_file @@ -402,8 +405,23 @@ def generate_tags( return src_tags, mt_tags +class NameTBDGeneralTags(StrEnum): + OK = 'OK' + + BAD_SUBSTITUTION = 'BAD-SUB' + BAD_DELETION_RIGHT = 'BAD-DEL-R' # smth deleted on the right side of this token + BAD_DELETION_LEFT = 'BAD-DEL-L' # smth deleted on the left side of this token + BAD_INSERTION = 'BAD-INS' # 1:n + BAD_SHIFTING = 'BAD-SHF' # change words order n:m with hight threshold + + BAD_CONTRACTION = 'BAD-CON' # 1:n + BAD_EXPANSION = 'BAD-EXP' + + class NameTBDTagger(QETagger): + ID = "tbd_qe" + def __init__( self, aligner: Optional[SentenceAligner] = None, @@ -429,7 +447,7 @@ def align_mt_pe( mt_tokens: List[List[str]], pe_tokens: List[List[str]], langs: List[str], - ) -> List[Tuple[int, int]]: + ) -> List[List[Tuple[int, int]]]: return [ self.aligner.get_word_aligns(mt_tok, pe_tok)["inter"] for mt_tok, pe_tok in tqdm( @@ -437,13 +455,64 @@ def align_mt_pe( ) ] + @staticmethod + def _group_by_node(alignments: List[Tuple[Optional[int], Optional[int]]], by_start_node: bool = True, sort: bool = False) -> Generator[Tuple[int, List[int]], None, None]: + """Yield a node id and a list of connected nodes.""" + _by_index = 0 if by_start_node else 1 + if sort: + alignments = sorted(alignments, key=lambda x: x[_by_index] if x[_by_index] is not None else -1) + for start_node, connected_alignments in groupby(alignments, lambda x: x[_by_index]): + yield start_node, [end_id if by_start_node else start_id for start_id, end_id in connected_alignments] + + @staticmethod + def _detect_crossing_edges(mt_tokens: List[str], pe_tokens: List[str], alignments: List[Tuple[Optional[int], Optional[int]]]) -> List[bool]: + """Detect crossing edges in the alignments. Return List of clusters of nodes that are connected.""" + # TODO: optimize from n^2 to n as 2 pointers + shifted_mt_mask = [False] * len(mt_tokens) + + for i in range(len(alignments)): + for j in range(i + 1, len(alignments)): + edge_1, edge_2 = alignments[i], alignments[j] + + # skip if one of the edges is None + if edge_1[0] is None or edge_1[1] is None or edge_2[0] is None or edge_2[1] is None: + continue + + # skip if starting same node + if edge_1[0] == edge_2[0]: + continue + + assert edge_1[0] < edge_2[0], "Alignments have to be are sorted by mt" + + # Check if crossing edges + if edge_1[0] < edge_2[0] and edge_1[1] > edge_2[1]: + # mark the mt token as shifted + shifted_mt_mask[edge_1[0]] = True + shifted_mt_mask[edge_2[0]] = True + + return shifted_mt_mask + + @staticmethod + def _lev_similarity(mt_tok: str, pe_tok: str) -> float: + """Calculate Lev similarity between two tokens in [0, 1] range.""" + if mt_tok == pe_tok: + return 1.0 + + # calculate similarity using Lev distance + return lev.ratio(mt_tok, pe_tok) + @staticmethod def tags_from_edits( mt_tokens: List[List[str]], pe_tokens: List[List[str]], - alignments: List[List[Tuple[int, int]]], - ) -> List[List[str]]: + mt_pe_alignments: List[List[Tuple[int, int]]], + mt_tokens_embeddings: Optional[List[List[np.ndarray]]] = None, + pe_tokens_embeddings: Optional[List[List[np.ndarray]]] = None, + threshold: float = 0.5, + ) -> List[List[Set[str]]]: """ Produce tags on MT tokens from edits found in the PE tokens. """ + # TODO: check. now - if embeddings are not provided, use Lev distance + # TODO: update docs with ERRORS approach rather than EDITS # 1:1 match: OK if same, SUB if different # 1:n match: # - Find highest match for 1 in n (lexical, LaBSE if not found) @@ -461,16 +530,118 @@ def tags_from_edits( # - If all matches are < threshold, skip and continue # - Else assign OK if same, SUB if different, remove from available m matches # If in a block with multiple crossing alignments (with blocks named A, B, ...): - # - Swapped pair A, B -> B, A: Both blocks recive SHF - # - For n > 2, all blocks changing relative position recive SHF, others don't - raise NotImplementedError() + # - Swapped pair A, B -> B, A: Both blocks receive SHF + # - For n > 2, all blocks changing relative position receive SHF, others don't + + mt_tags = [] + for mt_tok, pe_tok, mt_pe_align in tqdm(zip(mt_tokens, pe_tokens, mt_pe_alignments), desc="Tagging MT", total=len(mt_tokens)): + + mt_sent_tags: List[Set[str]] = [set() for _ in range(len(mt_tok))] + + # clear 1-n and n-1 nodes with low threshold + # e.g. if 1-n or n-1 have same token or high similarity, remove low similarity as deletions/insertions + aligns_remove_1_to_n, aligns_remove_n_to_1 = set(), set() + # 1-n match + for mt_node_id, connected_pe_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=True, sort=False): + if mt_node_id is not None and len(connected_pe_nodes_ids) > 1: + pe_similarity = [ + (pe_node_id, NameTBDTagger._lev_similarity(mt_tok[mt_node_id], pe_tok[pe_node_id])) + for pe_node_id in connected_pe_nodes_ids + if pe_node_id is not None + ] + if all(sim < threshold for _, sim in pe_similarity): + continue + if all(sim > threshold for _, sim in pe_similarity): + continue + aligns_remove_1_to_n.update([ + (mt_node_id, pe_node_id) + for pe_node_id, sim in pe_similarity + if sim < threshold + ]) + # remove selected aligns and add None connected nodes instead + mt_pe_align = [(None, align[1]) if align in aligns_remove_1_to_n else align for align in mt_pe_align] + # n-1 match + for pe_node_id, connected_mt_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=False, sort=True): + if pe_node_id is not None and len(connected_mt_nodes_ids) > 1: + mt_similarity = [ + (mt_node_id, NameTBDTagger._lev_similarity(mt_tok[mt_node_id], pe_tok[pe_node_id])) + for mt_node_id in connected_mt_nodes_ids + if mt_node_id is not None + ] + if all(sim < threshold for _, sim in mt_similarity): + continue + if all(sim > threshold for _, sim in mt_similarity): + continue + aligns_remove_n_to_1.update([ + (mt_node_id, pe_node_id) + for mt_node_id, sim in mt_similarity + if sim < threshold + ]) + # remove selected aligns and add None connected nodes instead + mt_pe_align = [(align[0], None) if align in aligns_remove_n_to_1 else align for align in mt_pe_align] + + # Solve all n-1: setup expansions tags and solve n-1 matches < threshold as smth+insertion + # TODO: check with threshold, now doing without threshold + for pe_node_id, connected_mt_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=False, sort=True): + if pe_node_id is not None and len(connected_mt_nodes_ids) > 1: + # expansion, mark related mt nodes + for mt_node_id in connected_mt_nodes_ids: + if mt_node_id is not None: + mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.BAD_EXPANSION.value) + + # Solve al deletions, add deletion tags on left and right sides + mt_position = 0 + for mt_node_id, connected_pe_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=True, sort=False): + if mt_node_id is None: + # deleted word error, mark left and right modes + if 0 <= mt_position - 1 < len(mt_sent_tags): + mt_sent_tags[mt_position - 1].add(NameTBDGeneralTags.BAD_DELETION_RIGHT.value) + if mt_position < len(mt_sent_tags): + mt_sent_tags[mt_position].add(NameTBDGeneralTags.BAD_DELETION_LEFT.value) + else: + mt_position += 1 + # clear all (None, i) to not mess grouping + mt_pe_align = [align for align in mt_pe_align if align[0] is not None] + + # Solve all 1-n matches + for mt_node_id, connected_pe_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=True, sort=True): + print(mt_node_id, ' -> ', connected_pe_nodes_ids, '\t\tmt_position=', mt_position) + assert mt_node_id is not None, "Already should be filtered all (None, smth) cases" + if NameTBDGeneralTags.BAD_EXPANSION.value in mt_sent_tags[mt_node_id]: + continue # TODO: check with gabrielle the priority for EXPANSION and CONTRACTION + if len(connected_pe_nodes_ids) > 1: + # contraction, mark the node + mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.BAD_CONTRACTION.value) + elif connected_pe_nodes_ids[0] is None: + # insertion, mark the node + mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.BAD_INSERTION.value) + elif mt_tok[mt_node_id] != pe_tok[connected_pe_nodes_ids[0]]: + # substitution, mark the node + mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.BAD_SUBSTITUTION.value) + else: + # OK, mark the node + mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.OK.value) + + # Add shifted tags if so + for mt_node_id, mask in enumerate(NameTBDTagger._detect_crossing_edges(mt_tok, pe_tok, mt_pe_align)): + if mask: + mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.BAD_SHIFTING.value) + + # Save tags for this sentence + mt_tags.append(mt_sent_tags) + + # Basic sanity check + assert all( + [len(mt_sent_tokens) == len(mt_sent_tags) for mt_sent_tokens, mt_sent_tags in zip(mt_tokens, mt_tags)] + ), "MT tags creation failed, number of tokens and tags do not match" + return mt_tags @staticmethod def tags_to_source( src_tokens: List[List[str]], mt_tokens: List[List[str]], - alignments: List[List[Tuple[int, int]]], - mt_tags: List[List[str]], + src_mt_alignments: List[List[Tuple[int, int]]], + mt_tags: List[List[Set[str]]], ) -> List[List[str]]: """ Propagate tags from MT to source. """ # 1:1 match: copy tags from MT @@ -484,3 +655,23 @@ def tags_to_source( # - If all matches are < threshold, ignore and continue # - Copy tags from top match in MT and ignore other matches raise NotImplementedError() + + def generate_tags( + self, + srcs: List[str], + mts: List[str], + pes: List[str], + src_langs: Union[str, List[Set[str]]], + tgt_langs: Union[str, List[Set[str]]], + ) -> Tuple[List[str], List[str]]: + src_tokens, src_langs = self.get_tokenized(srcs, src_langs) + mt_tokens, tgt_langs = self.get_tokenized(mts, tgt_langs) + pe_tokens, _ = self.get_tokenized(pes, tgt_langs) + src_mt_alignments = self.align_source_mt(src_tokens, mt_tokens, src_langs, tgt_langs) + mt_pe_alignments = self.align_mt_pe(mt_tokens, pe_tokens, tgt_langs) + mt_tags = self.tags_from_edits(mt_tokens, pe_tokens, mt_pe_alignments) + src_tags = self.tags_to_source( + src_tokens, pe_tokens, src_mt_alignments, mt_tags + ) + clear_nlp_cache() + return src_tags, mt_tags diff --git a/pyproject.toml b/pyproject.toml index 1b8a76c..e8b0f4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ "simalign", "strenum", "sentencepiece", + "sentence-transformers", # for LaBSE "tqdm", "black", "flake8", diff --git a/tests/test_qe_taggers_name_tbd_tagger.py b/tests/test_qe_taggers_name_tbd_tagger.py new file mode 100644 index 0000000..ea13df6 --- /dev/null +++ b/tests/test_qe_taggers_name_tbd_tagger.py @@ -0,0 +1,190 @@ +from typing import List, Tuple, Set + +import pytest +from strenum import StrEnum + +from divemt.qe_taggers import NameTBDTagger +from divemt.qe_taggers import NameTBDGeneralTags as Tags + + +class TestUtils: + @pytest.mark.parametrize("mt_len, mt_pe_alignments, true_mt_shifts_mask", [ + (1, [(0, 0)], [False]), + (2, [(0, 0), (1, 1)], [False, False]), + (3, [(0, 0), (1, 1), (2, 2)], [False, False, False]), + (3, [(0, 0), (1, None), (2, 1)], [False, False, False]), + # easiest case + (2, [(0, 1), (1, 0)], [True, True]), + # central one is not moved, but have crossing edges + (3, [(0, 2), (1, 1), (2, 0)], [True, True, True]), + # the central one deleted, so not shifted, no crossing edges + (3, [(0, 1), (1, None), (2, 0)], [True, False, True]), + # TODO: check with gabrielle + (4, [(0, 0), (1, 3), (1, 4), (1, 5), (2, 2), (2, 0), (3, None)], [False, True, True, False]), + ]) + def test_detect_crossing_edges(self, mt_len: int, mt_pe_alignments: List[Tuple[int, int]], true_mt_shifts_mask: List[bool]) -> None: + tagger = NameTBDTagger() + mt_shifts_mask = tagger._detect_crossing_edges([str(i) for i in range(mt_len)], [str(i) for i in range(mt_len)], mt_pe_alignments) + assert mt_shifts_mask == true_mt_shifts_mask + + +class TestTagsFromEdits: + @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ + (["A", "B"], ["A", "B"], [(0, 0), (1, 1)], [{Tags.OK}, {Tags.OK}]), + (["A", "B", "C", "D"], ["A", "B", "C", "D"], [(0, 0), (1, 1), (2, 2), (3, 3)], [{Tags.OK}, {Tags.OK}, {Tags.OK}, {Tags.OK}]), + ([], [], [], []), + ]) + def test_single_error_ok( + self, + mt_tokens: List[str], + pe_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + true_mt_tags: List[Set[StrEnum]], + ) -> None: + tagger = NameTBDTagger() + predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] + assert len(predicted_tags) == len(true_mt_tags) + for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): + assert predicted_tags == {t.value for t in true_tags} + + @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ + (["A", "B", "C"], ["A", "X", "Z"], [(0, 0), (1, 1), (2, 2)], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}]), + (["A", "B"], ["Z", "X"], [(0, 0), (1, 1)], [{Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}]), + # For 1-n and n-1 cases see contraction and expansion tests + ]) + def test_single_error_substitution( + self, + mt_tokens: List[str], + pe_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + true_mt_tags: List[Set[StrEnum]], + ) -> None: + tagger = NameTBDTagger() + predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] + assert len(predicted_tags) == len(true_mt_tags) + for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): + assert predicted_tags == {t.value for t in true_tags} + + @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ + (["A", "B"], ["A"], [(0, 0), (1, None)], [{Tags.OK}, {Tags.BAD_INSERTION}]), + (["A", "B"], ["B"], [(0, None), (1, 0)], [{Tags.BAD_INSERTION}, {Tags.OK}]), + (["A", "B"], [], [(0, None), (1, None)], [{Tags.BAD_INSERTION}, {Tags.BAD_INSERTION}]), + # For 1-n and n-1 cases see contraction and expansion tests + ]) + def test_single_error_insertion( + self, + mt_tokens: List[str], + pe_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + true_mt_tags: List[Set[StrEnum]], + ) -> None: + tagger = NameTBDTagger() + predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] + assert len(predicted_tags) == len(true_mt_tags) + for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): + assert predicted_tags == {t.value for t in true_tags} + + @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ + (["A"], ["A", "X"], [(0, 0), (None, 1)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}]), + (["A"], ["X", "A"], [(None, 0), (0, 1)], [{Tags.OK, Tags.BAD_DELETION_LEFT}]), + (["A", "B"], ["A", "X", "B"], [(0, 0), (None, 1), (1, 2)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_LEFT}]), + # Delete multiple tokens, but tag error as deleted one + (["A"], ["A", "X", "Y", "Z"], [(0, 0), (None, 1), (None, 2), (None, 3)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}]), + (["A"], ["X", "Y", "Z", "A"], [(None, 0), (None, 1), (None, 2), (0, 3)], [{Tags.OK, Tags.BAD_DELETION_LEFT}]), + (["A", "B"], ["A", "X", "Y", "Z", "B"], [(0, 0), (None, 1), (None, 2), (None, 3), (1, 4)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_LEFT}]), + # deleted both left and right sides + (["A"], ["X", "A", "Y"], [(None, 0), (0, 1), (None, 2)], [{Tags.OK, Tags.BAD_DELETION_LEFT, Tags.BAD_DELETION_RIGHT}]), + # deleted for empty target + ([], ["X"], [(None, 0)], []), + ]) + def test_single_error_deletion( + self, + mt_tokens: List[str], + pe_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + true_mt_tags: List[Set[StrEnum]], + ) -> None: + tagger = NameTBDTagger() + predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] + assert len(predicted_tags) == len(true_mt_tags) + for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): + assert predicted_tags == {t.value for t in true_tags} + + @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ + # Have same BBB token, so should filter CCC and TTT out as Deletion error and BBB as Ok + (["AAA", "BBB"], ["AAA", "BBB", "CCC", "TTT"], [(0, 0), (1, 1), (1, 2), (1, 3)], [{Tags.OK}, {Tags.OK, Tags.BAD_DELETION_RIGHT}]), + (["AAA", "BBB"], ["AAA", "TTT", "BBB", "CCC"], [(0, 0), (1, 1), (1, 2), (1, 3)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_RIGHT, Tags.BAD_DELETION_LEFT}]), + # XXX, TTT and CCC >threshold are same BBB token, so its bad Contradiction + (["AAA", "BBB"], ["AAA", "XXX", "CCC", "TTT"], [(0, 0), (1, 1), (1, 2), (1, 3)], [{Tags.OK}, {Tags.BAD_CONTRACTION}]), + # BBX is >threshold, CCC/TTT threshold, so all are Contractions + (["AAA", "BBB"], ["AAA", "BBX", "XBB"], [(0, 0), (1, 1), (1, 2)], [{Tags.OK}, {Tags.BAD_CONTRACTION}]), + # BBX and XBB >threshold while TTT is None: + tagger = NameTBDTagger() + predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] + assert len(predicted_tags) == len(true_mt_tags) + for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): + assert predicted_tags == {t.value for t in true_tags} + + @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ + # BB token is same, so CCC and TTT are insertions + (["AAA", "BBB", "CCC", "TTT"], ["AAA", "BBB"], [(0, 0), (1, 1), (2, 1), (3, 1)], [{Tags.OK}, {Tags.OK}, {Tags.BAD_INSERTION}, {Tags.BAD_INSERTION}]), + (["AAA", "TTT", "BBB", "CCC"], ["AAA", "BBB"], [(0, 0), (1, 1), (2, 1), (3, 1)], [{Tags.OK}, {Tags.BAD_INSERTION}, {Tags.OK}, {Tags.BAD_INSERTION}]), + # XXX, TTT and CCC >threshold are same BBB token, so its bad Expansion + (["AAA", "XXX", "CCC", "TTT"], ["AAA", "BBB"], [(0, 0), (1, 1), (2, 1), (3, 1)], [{Tags.OK}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}]), + # BBX is >threshold, CCC/TTT threshold, so all are Expansion + (["AAA", "BBX", "XBB"], ["AAA", "BBB"], [(0, 0), (1, 1), (2, 1)], [{Tags.OK}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}]), + # BBX and XBB >threshold while TTT is None: + tagger = NameTBDTagger() + predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] + assert len(predicted_tags) == len(true_mt_tags) + for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): + assert predicted_tags == {t.value for t in true_tags} + + @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ + # simple case + (["A", "B"], ["B", "A"], [(0, 1), (1, 0)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}]), + # middle intact, but crossing edges, so shifted + (["A", "X", "Y", "B"], ["B", "X", "Y", "A"], [(0, 3), (1, 1), (2, 2), (3, 0)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}]), + # node inserted, so should not be marked as shifted TODO: check with gabrielle + (["A", "X", "B"], ["B", "A"], [(0, 1), (1, None), (2, 0)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.BAD_INSERTION}, {Tags.OK, Tags.BAD_SHIFTING}]), + # node deleted, nothing to mark as shifted + (["A", "B"], ["B", "X", "A"], [(0, 2), (None, 1), (1, 0)], [{Tags.OK, Tags.BAD_SHIFTING, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_SHIFTING, Tags.BAD_DELETION_LEFT}]), + ]) + def test_single_error_shifted( + self, + mt_tokens: List[str], + pe_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + true_mt_tags: List[Set[StrEnum]], + ) -> None: + tagger = NameTBDTagger() + predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] + assert len(predicted_tags) == len(true_mt_tags) + for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): + assert predicted_tags == {t.value for t in true_tags} From 9b49b467aa31f63b38ce7e9ddb0826a945fe922a Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Mon, 1 May 2023 13:14:38 +0200 Subject: [PATCH 03/23] style: update tags_from_edits docs and some style fixes for typings --- divemt/qe_taggers.py | 142 ++++++++++++++++++++++++------------------- 1 file changed, 79 insertions(+), 63 deletions(-) diff --git a/divemt/qe_taggers.py b/divemt/qe_taggers.py index a3b8d0f..4ea2c94 100644 --- a/divemt/qe_taggers.py +++ b/divemt/qe_taggers.py @@ -5,7 +5,7 @@ from collections import defaultdict from itertools import groupby from pathlib import Path -from typing import List, Optional, Tuple, Union, Set, Generator +from typing import List, Optional, Tuple, Union, Set, Generator, Any from xml.sax.saxutils import escape import numpy as np @@ -20,6 +20,9 @@ logger = logging.getLogger(__name__) +TTag = Union[str, Set[str]] + + class QETagger(ABC): """An abstract class to produce quality estimation tags from src-mt-pe triplets.""" @@ -29,7 +32,7 @@ def align_source_mt( self, src_tokens: List[List[str]], mt_tokens: List[List[str]], - **align_source_mt_kwargs, + **align_source_mt_kwargs: Any, ) -> List[List[Tuple[int, int]]]: """Align source and machine translation tokens.""" raise NotImplementedError(f"{self.__class__.__name__} does not implement align_source_mt()") @@ -38,7 +41,7 @@ def align_source_pe( self, src_tokens: List[List[str]], pe_tokens: List[List[str]], - **align_source_pe_kwargs, + **align_source_pe_kwargs: Any, ) -> List[List[Tuple[int, int]]]: """Align source and post-edited tokens.""" raise NotImplementedError(f"{self.__class__.__name__} does not implement align_source_pe()") @@ -48,7 +51,7 @@ def align_mt_pe( self, mt_tokens: List[List[str]], pe_tokens: List[List[str]], - **align_mt_pe_kwargs, + **align_mt_pe_kwargs: Any, ) -> List[List[Tuple[int, int]]]: """Align machine translation and post-editing tokens.""" pass @@ -59,8 +62,8 @@ def tags_from_edits( mt_tokens: List[List[str]], pe_tokens: List[List[str]], alignments: List[List[Tuple[int, int]]], - **mt_tagging_kwargs, - ) -> List[List[str]]: + **mt_tagging_kwargs: Any, + ) -> List[List[TTag]]: """Produce tags on MT tokens from edits found in the PE tokens.""" pass @@ -69,8 +72,8 @@ def tags_from_edits( def tags_to_source( src_tokens: List[List[str]], tgt_tokens: List[List[str]], - **src_tagging_kwargs, - ) -> List[List[str]]: + **src_tagging_kwargs: Any, + ) -> List[List[TTag]]: """Propagate tags from MT to source.""" pass @@ -93,7 +96,7 @@ def generate_tags( pes: List[str], src_langs: Union[str, List[str]], tgt_langs: Union[str, List[str]], - ) -> Tuple[List[str], List[str]]: + ) -> Tuple[List[TTag], List[TTag]]: """Generate word-level quality estimation tags from source-mt-pe triplets. Args: @@ -111,7 +114,7 @@ def generate_tags( (one per machine translation). Returns: - `Tuple[List[str], List[str]]`: A tuple containing the lists of quality tags for all source and the machine + `Tuple[List[TTag], List[TTag]]`: A tuple containing the lists of quality tags for all source and the machine translation sentence, respectively. """ pass @@ -230,7 +233,7 @@ def tags_from_edits( alignments: List[List[Tuple[int, int]]], use_gaps: bool = False, omissions: str = OmissionRule.RIGHT.value, - ) -> List[List[str]]: + ) -> List[List[TTag]]: """Produce tags on MT tokens from edits found in the PE tokens.""" if use_gaps: omissions = OmissionRule.NONE.value @@ -324,7 +327,7 @@ def tags_to_source( src_pe_alignments: List[List[Tuple[int, int]]], mt_pe_alignments: List[List[Tuple[int, int]]], fluency_rule: str = FluencyRule.NORMAL.value, - ) -> List[List[str]]: + ) -> List[List[TTag]]: """Propagate tags from MT to source.""" # Reorganize source-target alignments as a dict pe2source = [] @@ -386,7 +389,7 @@ def generate_tags( use_gaps: bool = False, omissions: str = OmissionRule.RIGHT.value, fluency_rule: str = FluencyRule.NORMAL.value, - ) -> Tuple[List[List[str]], List[List[str]]]: + ) -> Tuple[List[List[TTag]], List[List[TTag]]]: src_tokens, src_langs = self.get_tokenized(srcs, src_langs) mt_tokens, tgt_langs = self.get_tokenized(mts, tgt_langs) pe_tokens, _ = self.get_tokenized(pes, tgt_langs) @@ -406,20 +409,21 @@ def generate_tags( class NameTBDGeneralTags(StrEnum): - OK = 'OK' + """Error types tags for NameTBD.""" + OK = 'OK' # 1:1 - the MT uses the same single word as the PE + BAD_SUBSTITUTION = 'BAD-SUB' # 1:1 - the MT uses a different single word than the PE - BAD_SUBSTITUTION = 'BAD-SUB' - BAD_DELETION_RIGHT = 'BAD-DEL-R' # smth deleted on the right side of this token - BAD_DELETION_LEFT = 'BAD-DEL-L' # smth deleted on the left side of this token - BAD_INSERTION = 'BAD-INS' # 1:n - BAD_SHIFTING = 'BAD-SHF' # change words order n:m with hight threshold + BAD_DELETION_RIGHT = 'BAD-DEL-R' # None:1 - the MT does not have a word existed in PE, deletion on the right + BAD_DELETION_LEFT = 'BAD-DEL-L' # None:1 - the MT does not have a word existed in PE, deletion on the left + BAD_INSERTION = 'BAD-INS' # 1:None - the MT wrongly inserted a words that is not in the PE - BAD_CONTRACTION = 'BAD-CON' # 1:n - BAD_EXPANSION = 'BAD-EXP' + BAD_SHIFTING = 'BAD-SHF' # for any number of tokens - detect crossing edges + BAD_CONTRACTION = 'BAD-CON' # 1:n - the MT uses a single word instead of multiple words in the PE + BAD_EXPANSION = 'BAD-EXP' # n:1 - the MT uses a multiple words instead of one in the PE -class NameTBDTagger(QETagger): +class NameTBDTagger(QETagger): ID = "tbd_qe" def __init__( @@ -466,7 +470,7 @@ def _group_by_node(alignments: List[Tuple[Optional[int], Optional[int]]], by_sta @staticmethod def _detect_crossing_edges(mt_tokens: List[str], pe_tokens: List[str], alignments: List[Tuple[Optional[int], Optional[int]]]) -> List[bool]: - """Detect crossing edges in the alignments. Return List of clusters of nodes that are connected.""" + """Detect crossing edges in the alignments. Return mask list of nodes that cross some other node.""" # TODO: optimize from n^2 to n as 2 pointers shifted_mt_mask = [False] * len(mt_tokens) @@ -509,41 +513,49 @@ def tags_from_edits( mt_tokens_embeddings: Optional[List[List[np.ndarray]]] = None, pe_tokens_embeddings: Optional[List[List[np.ndarray]]] = None, threshold: float = 0.5, - ) -> List[List[Set[str]]]: - """ Produce tags on MT tokens from edits found in the PE tokens. """ + ) -> List[List[TTag]]: + """ Produce tags on MT tokens from edits found in the PE tokens. + + Note: The tags indicate the type of error particular MT token is affected by. + + The following situations are considered: + 1:1 match: OK if same, SUB if different + 1:n match: + - Obtain similarity between 1 and n (lexical, LaBSE if not found) + - If all matches are threshold, tag as CON (contraction) + - Else, tackle the highest match as 1:1 (OK/SUB) and the rest as None:1 (deletions) + n:1 match: + - Obtain similarity between n and 1 (lexical, LaBSE if not found) + - If all matches are threshold, tag as EXP (expansion) + - Else, tackle the highest match as 1:1 (OK/SUB) and the rest as 1:None (insertions) + n:m match: + - Prioritize n:1 matches with the EXP (expansion) tag + - Clear all None:1 cases + - Consider all n:m as 1:m cases, if current MT token is not tagged as EXP + shifting: + - First, clear all None:1 and 1:None cases - deleted and inserted words can't be shifted + - Then for all edges check if they cross with any other edge + - If they do, mark both nodes (2 edges starting node) in MT as SHF (shifted) + - TODO: + If in a block with multiple crossing alignments (with blocks named A, B, ...): + - Swapped pair A, B -> B, A: Both blocks receive SHF + - For n > 2, all blocks changing relative position receive SHF, others don't + """ # TODO: check. now - if embeddings are not provided, use Lev distance - # TODO: update docs with ERRORS approach rather than EDITS - # 1:1 match: OK if same, SUB if different - # 1:n match: - # - Find highest match for 1 in n (lexical, LaBSE if not found) - # - If all matches are < threshold, tag as EXP (expansion) - # - Else, assign OK if same, SUB if different - # - If match preceded by some of the n, assign also INS to match - # - If match followed by some of the n, push an INS tag to the next token - # n:1 match: - # - Find highest match for 1 in n (lexical, LaBSE if not found) - # - If all matches are < threshold, tag as CON (contraction) - # - Else, assign OK if same, SUB if different - # - All n different than match are assigned DEL - # n:m match: - # - For each 1 in n, find highest match for 1 in m (lexical, LaBSE if not found, from highest score to lowest) - # - If all matches are < threshold, skip and continue - # - Else assign OK if same, SUB if different, remove from available m matches - # If in a block with multiple crossing alignments (with blocks named A, B, ...): - # - Swapped pair A, B -> B, A: Both blocks receive SHF - # - For n > 2, all blocks changing relative position receive SHF, others don't - mt_tags = [] + mt_tags: List[List[Set[str]]] = [] + for mt_tok, pe_tok, mt_pe_align in tqdm(zip(mt_tokens, pe_tokens, mt_pe_alignments), desc="Tagging MT", total=len(mt_tokens)): mt_sent_tags: List[Set[str]] = [set() for _ in range(len(mt_tok))] # clear 1-n and n-1 nodes with low threshold - # e.g. if 1-n or n-1 have same token or high similarity, remove low similarity as deletions/insertions + # e.g. if 1-n or n-1 have same token or high similarity, remove low similarity as deletions/insertions (None:1 and 1:None) aligns_remove_1_to_n, aligns_remove_n_to_1 = set(), set() # 1-n match for mt_node_id, connected_pe_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=True, sort=False): if mt_node_id is not None and len(connected_pe_nodes_ids) > 1: + # TODO: check alignments lib to have sim pe_similarity = [ (pe_node_id, NameTBDTagger._lev_similarity(mt_tok[mt_node_id], pe_tok[pe_node_id])) for pe_node_id in connected_pe_nodes_ids @@ -581,7 +593,6 @@ def tags_from_edits( mt_pe_align = [(align[0], None) if align in aligns_remove_n_to_1 else align for align in mt_pe_align] # Solve all n-1: setup expansions tags and solve n-1 matches < threshold as smth+insertion - # TODO: check with threshold, now doing without threshold for pe_node_id, connected_mt_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=False, sort=True): if pe_node_id is not None and len(connected_mt_nodes_ids) > 1: # expansion, mark related mt nodes @@ -589,7 +600,7 @@ def tags_from_edits( if mt_node_id is not None: mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.BAD_EXPANSION.value) - # Solve al deletions, add deletion tags on left and right sides + # Solve all deletions, add deletion tags on left and right sides mt_position = 0 for mt_node_id, connected_pe_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=True, sort=False): if mt_node_id is None: @@ -608,7 +619,7 @@ def tags_from_edits( print(mt_node_id, ' -> ', connected_pe_nodes_ids, '\t\tmt_position=', mt_position) assert mt_node_id is not None, "Already should be filtered all (None, smth) cases" if NameTBDGeneralTags.BAD_EXPANSION.value in mt_sent_tags[mt_node_id]: - continue # TODO: check with gabrielle the priority for EXPANSION and CONTRACTION + continue if len(connected_pe_nodes_ids) > 1: # contraction, mark the node mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.BAD_CONTRACTION.value) @@ -642,18 +653,23 @@ def tags_to_source( mt_tokens: List[List[str]], src_mt_alignments: List[List[Tuple[int, int]]], mt_tags: List[List[Set[str]]], - ) -> List[List[str]]: - """ Propagate tags from MT to source. """ - # 1:1 match: copy tags from MT - # 1:n match: - # - Find highest match for 1 in n (lexical, LaBSE if not found) - # - If all matches are < threshold, TBD - # - Else, copy tags from top match in MT and ignore other matches - # n:1 match: copy tags from 1 to all n - # n:m match: - # - For each 1 in n, find highest match for 1 in m (lexical, LaBSE if not found) - # - If all matches are < threshold, ignore and continue - # - Copy tags from top match in MT and ignore other matches + ) -> List[List[TTag]]: + """ Propagate tags from MT to source. + + # TODO: update docstring with the final logic + The following cases are considered: + 1:1 match: copy tags from MT + 1:n match: + - Find highest match for 1 in n (lexical, LaBSE if not found) + - If all matches are threshold, TBD + - Else, copy tags from top match in MT and ignore other matches + n:1 match: copy tags from 1 to all n + n:m match: + - For each 1 in n, find highest match for 1 in m (lexical, LaBSE if not found) + - If all matches are threshold, ignore and continue + - Copy tags from top match in MT and ignore other matches + """ + raise NotImplementedError() def generate_tags( @@ -663,7 +679,7 @@ def generate_tags( pes: List[str], src_langs: Union[str, List[Set[str]]], tgt_langs: Union[str, List[Set[str]]], - ) -> Tuple[List[str], List[str]]: + ) -> Tuple[List[TTag], List[TTag]]: src_tokens, src_langs = self.get_tokenized(srcs, src_langs) mt_tokens, tgt_langs = self.get_tokenized(mts, tgt_langs) pe_tokens, _ = self.get_tokenized(pes, tgt_langs) From 9732403a278b37e2a8a5d0e260bdbbf4cc577de1 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Thu, 4 May 2023 14:06:15 +0200 Subject: [PATCH 04/23] feat: tags to source and attention sim scores --- divemt/custom_simalign.py | 282 +++++++++++++++++++++++ divemt/qe_taggers.py | 150 ++++++------ divemt/wmt22qe_utils.py | 6 +- tests/test_qe_taggers_name_tbd_tagger.py | 162 ++++++++----- 4 files changed, 481 insertions(+), 119 deletions(-) create mode 100644 divemt/custom_simalign.py diff --git a/divemt/custom_simalign.py b/divemt/custom_simalign.py new file mode 100644 index 0000000..77a9559 --- /dev/null +++ b/divemt/custom_simalign.py @@ -0,0 +1,282 @@ +""" +Copy of the https://github.com/cisnlp/simalign version 0.3 +Custom changes: +- added embedding output +- black and ruff are applied +- deleted some unused imports +- fix typings +- logger deleted +""" +from typing import Dict, List, Tuple, Union, Optional + +import numpy as np +from scipy.sparse import csr_matrix +from sklearn.metrics.pairwise import cosine_similarity + +try: + import networkx as nx + from networkx.algorithms.bipartite.matrix import from_biadjacency_matrix +except ImportError: + nx = None +import torch +from transformers import ( + BertModel, + BertTokenizer, + XLMModel, + XLMTokenizer, + RobertaModel, + RobertaTokenizer, + XLMRobertaModel, + XLMRobertaTokenizer, + AutoConfig, + AutoModel, + AutoTokenizer, +) + + +class EmbeddingLoader(object): + def __init__(self, model: str = "bert-base-multilingual-cased", device=torch.device("cpu"), layer: int = 8): + TR_Models = { + "bert-base-uncased": (BertModel, BertTokenizer), + "bert-base-multilingual-cased": (BertModel, BertTokenizer), + "bert-base-multilingual-uncased": (BertModel, BertTokenizer), + "xlm-mlm-100-1280": (XLMModel, XLMTokenizer), + "roberta-base": (RobertaModel, RobertaTokenizer), + "xlm-roberta-base": (XLMRobertaModel, XLMRobertaTokenizer), + "xlm-roberta-large": (XLMRobertaModel, XLMRobertaTokenizer), + } + + self.model = model + self.device = device + self.layer = layer + self.emb_model = None + self.tokenizer = None + + if model in TR_Models: + model_class, tokenizer_class = TR_Models[model] + self.emb_model = model_class.from_pretrained(model, output_hidden_states=True) + self.emb_model.eval() + self.emb_model.to(self.device) + self.tokenizer = tokenizer_class.from_pretrained(model) + else: + # try to load model with auto-classes + config = AutoConfig.from_pretrained(model, output_hidden_states=True) + self.emb_model = AutoModel.from_pretrained(model, config=config) + self.emb_model.eval() + self.emb_model.to(self.device) + self.tokenizer = AutoTokenizer.from_pretrained(model) + + def get_embed_list(self, sent_batch: List[List[str]]) -> torch.Tensor: + if self.emb_model is not None: + with torch.no_grad(): + if not isinstance(sent_batch[0], str): + inputs = self.tokenizer( + sent_batch, is_split_into_words=True, padding=True, truncation=True, return_tensors="pt" + ) + else: + inputs = self.tokenizer( + sent_batch, is_split_into_words=False, padding=True, truncation=True, return_tensors="pt" + ) + hidden = self.emb_model(**inputs.to(self.device))["hidden_states"] + if self.layer >= len(hidden): + raise ValueError( + f"Specified to take embeddings from layer {self.layer}, but model has only" + f" {len(hidden)} layers." + ) + outputs = hidden[self.layer] + return outputs[:, 1:-1, :] + else: + return None + + +class SentenceAligner(object): + def __init__( + self, + model: str = "bert", + token_type: str = "bpe", + distortion: float = 0.0, + matching_methods: str = "mai", + return_similarity: Optional[str] = None, # new: ["max", "avg"] type of average similarity for words from tokens + device: str = "cpu", + layer: int = 8, + ): + model_names = {"bert": "bert-base-multilingual-cased", "xlmr": "xlm-roberta-base"} + all_matching_methods = {"a": "inter", "m": "mwmf", "i": "itermax", "f": "fwd", "r": "rev"} + + self.model = model + if model in model_names: + self.model = model_names[model] + self.token_type = token_type + self.distortion = distortion + self.matching_methods = [all_matching_methods[m] for m in matching_methods] + self.return_similarity = return_similarity + self.device = torch.device(device) + + self.embed_loader = EmbeddingLoader(model=self.model, device=self.device, layer=layer) + + @staticmethod + def get_max_weight_match(sim: np.ndarray) -> np.ndarray: + if nx is None: + raise ValueError("networkx must be installed to use match algorithm.") + + def permute(edge): + if edge[0] < sim.shape[0]: + return edge[0], edge[1] - sim.shape[0] + else: + return edge[1], edge[0] - sim.shape[0] + + G = from_biadjacency_matrix(csr_matrix(sim)) + matching = nx.max_weight_matching(G, maxcardinality=True) + matching = [permute(x) for x in matching] + matching = sorted(matching, key=lambda x: x[0]) + res_matrix = np.zeros_like(sim) + for edge in matching: + res_matrix[edge[0], edge[1]] = 1 + return res_matrix + + @staticmethod + def get_similarity(X: np.ndarray, Y: np.ndarray) -> np.ndarray: + return (cosine_similarity(X, Y) + 1.0) / 2.0 + + @staticmethod + def average_embeds_over_words(bpe_vectors: np.ndarray, word_tokens_pair: List[List[str]]) -> List[np.array]: + w2b_map = [] + cnt = 0 + w2b_map.append([]) + for wlist in word_tokens_pair[0]: + w2b_map[0].append([]) + for x in wlist: + w2b_map[0][-1].append(cnt) + cnt += 1 + cnt = 0 + w2b_map.append([]) + for wlist in word_tokens_pair[1]: + w2b_map[1].append([]) + for x in wlist: + w2b_map[1][-1].append(cnt) + cnt += 1 + + new_vectors = [] + for l_id in range(2): + w_vector = [] + for word_set in w2b_map[l_id]: + w_vector.append(bpe_vectors[l_id][word_set].mean(0)) + new_vectors.append(np.array(w_vector)) + return new_vectors + + @staticmethod + def get_alignment_matrix(sim_matrix: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + m, n = sim_matrix.shape + forward = np.eye(n)[sim_matrix.argmax(axis=1)] # m x n + backward = np.eye(m)[sim_matrix.argmax(axis=0)] # n x m + return forward, backward.transpose() + + @staticmethod + def apply_distortion(sim_matrix: np.ndarray, ratio: float = 0.5) -> np.ndarray: + shape = sim_matrix.shape + if (shape[0] < 2 or shape[1] < 2) or ratio == 0.0: + return sim_matrix + + pos_x = np.array([[y / float(shape[1] - 1) for y in range(shape[1])] for x in range(shape[0])]) + pos_y = np.array([[x / float(shape[0] - 1) for x in range(shape[0])] for y in range(shape[1])]) + distortion_mask = 1.0 - ((pos_x - np.transpose(pos_y)) ** 2) * ratio + + return np.multiply(sim_matrix, distortion_mask) + + @staticmethod + def iter_max(sim_matrix: np.ndarray, max_count: int = 2) -> np.ndarray: + alpha_ratio = 0.9 + m, n = sim_matrix.shape + forward = np.eye(n)[sim_matrix.argmax(axis=1)] # m x n + backward = np.eye(m)[sim_matrix.argmax(axis=0)] # n x m + inter = forward * backward.transpose() + + if min(m, n) <= 2: + return inter + + new_inter = np.zeros((m, n)) + count = 1 + while count < max_count: + mask_x = 1.0 - np.tile(inter.sum(1)[:, np.newaxis], (1, n)).clip(0.0, 1.0) + mask_y = 1.0 - np.tile(inter.sum(0)[np.newaxis, :], (m, 1)).clip(0.0, 1.0) + mask = ((alpha_ratio * mask_x) + (alpha_ratio * mask_y)).clip(0.0, 1.0) + mask_zeros = 1.0 - ((1.0 - mask_x) * (1.0 - mask_y)) + if mask_x.sum() < 1.0 or mask_y.sum() < 1.0: + mask *= 0.0 + mask_zeros *= 0.0 + + new_sim = sim_matrix * mask + fwd = np.eye(n)[new_sim.argmax(axis=1)] * mask_zeros + bac = np.eye(m)[new_sim.argmax(axis=0)].transpose() * mask_zeros + new_inter = fwd * bac + + if np.array_equal(inter + new_inter, inter): + break + inter = inter + new_inter + count += 1 + return inter + + def get_word_aligns(self, src_sent: Union[str, List[str]], trg_sent: Union[str, List[str]]) -> Dict[str, List]: + if isinstance(src_sent, str): + src_sent = src_sent.split() + if isinstance(trg_sent, str): + trg_sent = trg_sent.split() + l1_tokens = [self.embed_loader.tokenizer.tokenize(word) for word in src_sent] + l2_tokens = [self.embed_loader.tokenizer.tokenize(word) for word in trg_sent] + bpe_lists = [[bpe for w in sent for bpe in w] for sent in [l1_tokens, l2_tokens]] + + if self.token_type == "bpe": + l1_b2w_map = [] + for i, wlist in enumerate(l1_tokens): + l1_b2w_map += [i for x in wlist] + l2_b2w_map = [] + for i, wlist in enumerate(l2_tokens): + l2_b2w_map += [i for x in wlist] + + vectors = self.embed_loader.get_embed_list([src_sent, trg_sent]).cpu().detach().numpy() + vectors = [vectors[i, : len(bpe_lists[i])] for i in [0, 1]] + + if self.token_type == "word": + vectors = self.average_embeds_over_words(vectors, [l1_tokens, l2_tokens]) + + all_mats = {} + sim = self.get_similarity(vectors[0], vectors[1]) + sim = self.apply_distortion(sim, self.distortion) + + all_mats["fwd"], all_mats["rev"] = self.get_alignment_matrix(sim) + all_mats["inter"] = all_mats["fwd"] * all_mats["rev"] + if "mwmf" in self.matching_methods: + all_mats["mwmf"] = self.get_max_weight_match(sim) + if "itermax" in self.matching_methods: + all_mats["itermax"] = self.iter_max(sim) + + # new: get word-level similarity matrix + if self.return_similarity and self.token_type == "bpe": + words_similarity = np.zeros((len(l1_tokens), len(l2_tokens)), dtype=np.float32) + for i in l1_b2w_map: + for j in l2_b2w_map: + if self.return_similarity == "max": + words_similarity[i, j] = max(words_similarity[i, j], sim[i, j]) + elif self.return_similarity == "avg": + words_similarity[i, j] += sim[i, j] / len(l1_tokens[i]) / len(l2_tokens[j]) + else: + raise ValueError(f"return_similarity={self.return_similarity} is not implemented.") + + aligns = {x: set() for x in self.matching_methods} + for i in range(len(vectors[0])): + for j in range(len(vectors[1])): + for ext in self.matching_methods: + if all_mats[ext][i, j] > 0: + if self.token_type == "bpe": + if self.return_similarity: + aligns[ext].add((l1_b2w_map[i], l2_b2w_map[j], words_similarity[l1_b2w_map[i], l2_b2w_map[j]])) + else: + aligns[ext].add((l1_b2w_map[i], l2_b2w_map[j])) + else: + if self.return_similarity: + aligns[ext].add((i, j, sim[i, j])) + else: + aligns[ext].add((i, j)) + for ext in aligns: + aligns[ext] = sorted(aligns[ext]) + return aligns diff --git a/divemt/qe_taggers.py b/divemt/qe_taggers.py index 4ea2c94..094439b 100644 --- a/divemt/qe_taggers.py +++ b/divemt/qe_taggers.py @@ -1,6 +1,7 @@ import codecs import logging import subprocess +import sys from abc import ABC, abstractmethod from collections import defaultdict from itertools import groupby @@ -8,12 +9,14 @@ from typing import List, Optional, Tuple, Union, Set, Generator, Any from xml.sax.saxutils import escape -import numpy as np from simalign import SentenceAligner -from strenum import StrEnum +if sys.version_info < (3, 11): + from strenum import StrEnum +else: + from enum import StrEnum from tqdm import tqdm -import Levenshtein as lev +from .custom_simalign import SentenceAligner as CustomSentenceAligner from .parse_utils import clear_nlp_cache, tokenize from .wmt22qe_utils import align_sentence_tercom, parse_tercom_xml_file @@ -21,6 +24,7 @@ TTag = Union[str, Set[str]] +TAlignment = Union[Tuple[Optional[int], Optional[int]], Tuple[Optional[int], Optional[int], Optional[float]]] class QETagger(ABC): @@ -33,7 +37,7 @@ def align_source_mt( src_tokens: List[List[str]], mt_tokens: List[List[str]], **align_source_mt_kwargs: Any, - ) -> List[List[Tuple[int, int]]]: + ) -> List[List[TAlignment]]: """Align source and machine translation tokens.""" raise NotImplementedError(f"{self.__class__.__name__} does not implement align_source_mt()") @@ -42,7 +46,7 @@ def align_source_pe( src_tokens: List[List[str]], pe_tokens: List[List[str]], **align_source_pe_kwargs: Any, - ) -> List[List[Tuple[int, int]]]: + ) -> List[List[TAlignment]]: """Align source and post-edited tokens.""" raise NotImplementedError(f"{self.__class__.__name__} does not implement align_source_pe()") @@ -52,7 +56,7 @@ def align_mt_pe( mt_tokens: List[List[str]], pe_tokens: List[List[str]], **align_mt_pe_kwargs: Any, - ) -> List[List[Tuple[int, int]]]: + ) -> List[List[TAlignment]]: """Align machine translation and post-editing tokens.""" pass @@ -61,7 +65,7 @@ def align_mt_pe( def tags_from_edits( mt_tokens: List[List[str]], pe_tokens: List[List[str]], - alignments: List[List[Tuple[int, int]]], + alignments: List[List[TAlignment]], **mt_tagging_kwargs: Any, ) -> List[List[TTag]]: """Produce tags on MT tokens from edits found in the PE tokens.""" @@ -167,7 +171,7 @@ def align_source_pe( src_tokens: List[List[str]], pe_tokens: List[List[str]], pe_langs: List[str], - ) -> List[List[Tuple[int, int]]]: + ) -> List[List[TAlignment]]: return [ self.aligner.get_word_aligns(src_tok, mt_tok)["itermax" if mt_lang not in ["de", "cs"] else "inter"] for src_tok, mt_tok, mt_lang in tqdm( @@ -181,7 +185,7 @@ def align_mt_pe( self, mt_tokens: List[List[str]], pe_tokens: List[List[str]], - ) -> List[List[Tuple[int, int]]]: + ) -> List[List[TAlignment]]: ref_fname = self.tmp_dir / "ref.txt" hyp_fname = self.tmp_dir / "hyp.txt" # Adapted from https://github.com/deep-spin/qe-corpus-builder/corpus_generation/tools/format_tercom.py @@ -230,7 +234,7 @@ def align_mt_pe( def tags_from_edits( mt_tokens: List[List[str]], pe_tokens: List[List[str]], - alignments: List[List[Tuple[int, int]]], + alignments: List[List[TAlignment]], use_gaps: bool = False, omissions: str = OmissionRule.RIGHT.value, ) -> List[List[TTag]]: @@ -324,8 +328,8 @@ def tags_to_source( src_tokens: List[List[str]], pe_tokens: List[List[str]], mt_tokens: List[List[str]], - src_pe_alignments: List[List[Tuple[int, int]]], - mt_pe_alignments: List[List[Tuple[int, int]]], + src_pe_alignments: List[List[TAlignment]], + mt_pe_alignments: List[List[TAlignment]], fluency_rule: str = FluencyRule.NORMAL.value, ) -> List[List[TTag]]: """Propagate tags from MT to source.""" @@ -428,9 +432,9 @@ class NameTBDTagger(QETagger): def __init__( self, - aligner: Optional[SentenceAligner] = None, + aligner: Optional[CustomSentenceAligner] = None, ): - self.aligner = aligner if aligner else SentenceAligner(model="bert", token_type="bpe", matching_methods="mai") + self.aligner = aligner if aligner else CustomSentenceAligner(model="bert", token_type="bpe", matching_methods="mai", return_similarity="avg") def align_source_mt( self, @@ -438,7 +442,7 @@ def align_source_mt( mt_tokens: List[List[str]], src_langs: List[str], mt_langs: List[str], - ) -> List[List[Tuple[int, int]]]: + ) -> List[List[TAlignment]]: return [ self.aligner.get_word_aligns(src_tok, mt_tok)["inter"] for src_tok, mt_tok in tqdm( @@ -451,7 +455,7 @@ def align_mt_pe( mt_tokens: List[List[str]], pe_tokens: List[List[str]], langs: List[str], - ) -> List[List[Tuple[int, int]]]: + ) -> List[List[TAlignment]]: return [ self.aligner.get_word_aligns(mt_tok, pe_tok)["inter"] for mt_tok, pe_tok in tqdm( @@ -460,16 +464,17 @@ def align_mt_pe( ] @staticmethod - def _group_by_node(alignments: List[Tuple[Optional[int], Optional[int]]], by_start_node: bool = True, sort: bool = False) -> Generator[Tuple[int, List[int]], None, None]: + def _group_by_node(alignments: List[Tuple[Optional[int], Optional[int]]], by_start_node: bool = True, sort: bool = False) -> Generator[Tuple[int, List[int], List[float]], None, None]: """Yield a node id and a list of connected nodes.""" _by_index = 0 if by_start_node else 1 if sort: alignments = sorted(alignments, key=lambda x: x[_by_index] if x[_by_index] is not None else -1) for start_node, connected_alignments in groupby(alignments, lambda x: x[_by_index]): - yield start_node, [end_id if by_start_node else start_id for start_id, end_id in connected_alignments] + connected_alignments = list(connected_alignments) + yield start_node, [end_id if by_start_node else start_id for start_id, end_id, _ in connected_alignments], [similarity for _, _, similarity in connected_alignments] @staticmethod - def _detect_crossing_edges(mt_tokens: List[str], pe_tokens: List[str], alignments: List[Tuple[Optional[int], Optional[int]]]) -> List[bool]: + def _detect_crossing_edges(mt_tokens: List[str], pe_tokens: List[str], alignments: List[Tuple[Optional[int], Optional[int], float]]) -> List[bool]: """Detect crossing edges in the alignments. Return mask list of nodes that cross some other node.""" # TODO: optimize from n^2 to n as 2 pointers shifted_mt_mask = [False] * len(mt_tokens) @@ -496,23 +501,12 @@ def _detect_crossing_edges(mt_tokens: List[str], pe_tokens: List[str], alignment return shifted_mt_mask - @staticmethod - def _lev_similarity(mt_tok: str, pe_tok: str) -> float: - """Calculate Lev similarity between two tokens in [0, 1] range.""" - if mt_tok == pe_tok: - return 1.0 - - # calculate similarity using Lev distance - return lev.ratio(mt_tok, pe_tok) - @staticmethod def tags_from_edits( mt_tokens: List[List[str]], pe_tokens: List[List[str]], - mt_pe_alignments: List[List[Tuple[int, int]]], - mt_tokens_embeddings: Optional[List[List[np.ndarray]]] = None, - pe_tokens_embeddings: Optional[List[List[np.ndarray]]] = None, - threshold: float = 0.5, + mt_pe_alignments: List[List[TAlignment]], + threshold: float = 0.8, ) -> List[List[TTag]]: """ Produce tags on MT tokens from edits found in the PE tokens. @@ -545,55 +539,44 @@ def tags_from_edits( mt_tags: List[List[Set[str]]] = [] - for mt_tok, pe_tok, mt_pe_align in tqdm(zip(mt_tokens, pe_tokens, mt_pe_alignments), desc="Tagging MT", total=len(mt_tokens)): + for mt_sent_tok, pe_sent_tok, mt_pe_sent_align in tqdm(zip(mt_tokens, pe_tokens, mt_pe_alignments), desc="Tagging MT", total=len(mt_tokens)): - mt_sent_tags: List[Set[str]] = [set() for _ in range(len(mt_tok))] + mt_sent_tags: List[Set[str]] = [set() for _ in range(len(mt_sent_tok))] # clear 1-n and n-1 nodes with low threshold # e.g. if 1-n or n-1 have same token or high similarity, remove low similarity as deletions/insertions (None:1 and 1:None) aligns_remove_1_to_n, aligns_remove_n_to_1 = set(), set() # 1-n match - for mt_node_id, connected_pe_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=True, sort=False): + for mt_node_id, connected_pe_nodes_ids, connected_pe_similarity in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=True, sort=True): if mt_node_id is not None and len(connected_pe_nodes_ids) > 1: - # TODO: check alignments lib to have sim - pe_similarity = [ - (pe_node_id, NameTBDTagger._lev_similarity(mt_tok[mt_node_id], pe_tok[pe_node_id])) - for pe_node_id in connected_pe_nodes_ids - if pe_node_id is not None - ] - if all(sim < threshold for _, sim in pe_similarity): + if all(sim < threshold for sim in connected_pe_similarity): continue - if all(sim > threshold for _, sim in pe_similarity): + if all(sim > threshold for sim in connected_pe_similarity): continue aligns_remove_1_to_n.update([ - (mt_node_id, pe_node_id) - for pe_node_id, sim in pe_similarity + (mt_node_id, pe_node_id, sim) + for pe_node_id, sim in zip(connected_pe_nodes_ids, connected_pe_similarity) if sim < threshold ]) # remove selected aligns and add None connected nodes instead - mt_pe_align = [(None, align[1]) if align in aligns_remove_1_to_n else align for align in mt_pe_align] + mt_pe_sent_align = [(None, align[1], None) if align in aligns_remove_1_to_n else align for align in mt_pe_sent_align] # n-1 match - for pe_node_id, connected_mt_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=False, sort=True): + for pe_node_id, connected_mt_nodes_ids, connected_mt_similarity in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=False, sort=True): if pe_node_id is not None and len(connected_mt_nodes_ids) > 1: - mt_similarity = [ - (mt_node_id, NameTBDTagger._lev_similarity(mt_tok[mt_node_id], pe_tok[pe_node_id])) - for mt_node_id in connected_mt_nodes_ids - if mt_node_id is not None - ] - if all(sim < threshold for _, sim in mt_similarity): + if all(sim < threshold for sim in connected_mt_similarity): continue - if all(sim > threshold for _, sim in mt_similarity): + if all(sim > threshold for sim in connected_mt_similarity): continue aligns_remove_n_to_1.update([ - (mt_node_id, pe_node_id) - for mt_node_id, sim in mt_similarity + (mt_node_id, pe_node_id, sim) + for mt_node_id, sim in zip(connected_mt_nodes_ids, connected_mt_similarity) if sim < threshold ]) # remove selected aligns and add None connected nodes instead - mt_pe_align = [(align[0], None) if align in aligns_remove_n_to_1 else align for align in mt_pe_align] + mt_pe_sent_align = [(align[0], None, None) if align in aligns_remove_n_to_1 else align for align in mt_pe_sent_align] # Solve all n-1: setup expansions tags and solve n-1 matches < threshold as smth+insertion - for pe_node_id, connected_mt_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=False, sort=True): + for pe_node_id, connected_mt_nodes_ids, _ in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=False, sort=True): if pe_node_id is not None and len(connected_mt_nodes_ids) > 1: # expansion, mark related mt nodes for mt_node_id in connected_mt_nodes_ids: @@ -602,7 +585,7 @@ def tags_from_edits( # Solve all deletions, add deletion tags on left and right sides mt_position = 0 - for mt_node_id, connected_pe_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=True, sort=False): + for mt_node_id, connected_pe_nodes_ids, _ in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=True, sort=False): if mt_node_id is None: # deleted word error, mark left and right modes if 0 <= mt_position - 1 < len(mt_sent_tags): @@ -612,11 +595,10 @@ def tags_from_edits( else: mt_position += 1 # clear all (None, i) to not mess grouping - mt_pe_align = [align for align in mt_pe_align if align[0] is not None] + mt_pe_sent_align = [align for align in mt_pe_sent_align if align[0] is not None] # Solve all 1-n matches - for mt_node_id, connected_pe_nodes_ids in NameTBDTagger._group_by_node(mt_pe_align, by_start_node=True, sort=True): - print(mt_node_id, ' -> ', connected_pe_nodes_ids, '\t\tmt_position=', mt_position) + for mt_node_id, connected_pe_nodes_ids, _ in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=True, sort=True): assert mt_node_id is not None, "Already should be filtered all (None, smth) cases" if NameTBDGeneralTags.BAD_EXPANSION.value in mt_sent_tags[mt_node_id]: continue @@ -626,7 +608,7 @@ def tags_from_edits( elif connected_pe_nodes_ids[0] is None: # insertion, mark the node mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.BAD_INSERTION.value) - elif mt_tok[mt_node_id] != pe_tok[connected_pe_nodes_ids[0]]: + elif mt_sent_tok[mt_node_id] != pe_sent_tok[connected_pe_nodes_ids[0]]: # substitution, mark the node mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.BAD_SUBSTITUTION.value) else: @@ -634,7 +616,7 @@ def tags_from_edits( mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.OK.value) # Add shifted tags if so - for mt_node_id, mask in enumerate(NameTBDTagger._detect_crossing_edges(mt_tok, pe_tok, mt_pe_align)): + for mt_node_id, mask in enumerate(NameTBDTagger._detect_crossing_edges(mt_sent_tok, pe_sent_tok, mt_pe_sent_align)): if mask: mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.BAD_SHIFTING.value) @@ -651,7 +633,7 @@ def tags_from_edits( def tags_to_source( src_tokens: List[List[str]], mt_tokens: List[List[str]], - src_mt_alignments: List[List[Tuple[int, int]]], + src_mt_alignments: List[List[TAlignment]], mt_tags: List[List[Set[str]]], ) -> List[List[TTag]]: """ Propagate tags from MT to source. @@ -670,7 +652,43 @@ def tags_to_source( - Copy tags from top match in MT and ignore other matches """ - raise NotImplementedError() + src_tags: List[List[Set[str]]] = [] + + for src_sent_tok, mt_sent_tok, mt_sent_tags, mt_pe_sent_align in tqdm(zip(src_tokens, mt_tokens, mt_tags, src_mt_alignments), desc="Transfer to source", total=len(src_tokens)): + + src_sent_tags: List[Set[str]] = [set() for _ in range(len(src_sent_tok))] + + # Solve all as 1-n matches + for src_node_id, connected_mt_nodes_ids, connected_mt_similarity in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=True, sort=True): + if src_node_id is None: + continue + elif len(connected_mt_nodes_ids) == 0: + continue + elif len(connected_mt_nodes_ids) > 1: + # n-1 match, find best match + best_mt_node_id, best_mt_similarity = None, 0.0 + for mt_node_id, mt_similarity in zip(connected_mt_nodes_ids, connected_mt_similarity): + if mt_similarity is not None and mt_similarity > best_mt_similarity: + best_mt_node_id, best_mt_similarity = mt_node_id, mt_similarity + if best_mt_node_id is None: + # no good match, ignore + continue + else: + # copy tags from best match + src_sent_tags[src_node_id].update(mt_sent_tags[best_mt_node_id]) + elif connected_mt_nodes_ids[0] is None: + # nothing to copy from MT + continue + else: + # 1-1 match, copy tags + src_sent_tags[src_node_id].update(mt_sent_tags[connected_mt_nodes_ids[0]]) + + # Save tags for this sentence + src_tags.append(src_sent_tags) + + # Basic sanity checks + assert all(len(aa) == len(bb) for aa, bb in zip(src_tokens, src_tags)), "Source tags creation failed, number of tokens and tags do not match" + return src_tags def generate_tags( self, diff --git a/divemt/wmt22qe_utils.py b/divemt/wmt22qe_utils.py index 6ff2731..7322bb5 100644 --- a/divemt/wmt22qe_utils.py +++ b/divemt/wmt22qe_utils.py @@ -1,8 +1,12 @@ import re +import sys from typing import List, Tuple from xml.dom.minidom import parse -from strenum import StrEnum +if sys.version_info < (3, 11): + from strenum import StrEnum +else: + from enum import StrEnum class TercomEdit(StrEnum): diff --git a/tests/test_qe_taggers_name_tbd_tagger.py b/tests/test_qe_taggers_name_tbd_tagger.py index ea13df6..a087eba 100644 --- a/tests/test_qe_taggers_name_tbd_tagger.py +++ b/tests/test_qe_taggers_name_tbd_tagger.py @@ -1,37 +1,42 @@ +import sys from typing import List, Tuple, Set import pytest -from strenum import StrEnum +if sys.version_info < (3, 11): + from strenum import StrEnum +else: + from enum import StrEnum from divemt.qe_taggers import NameTBDTagger from divemt.qe_taggers import NameTBDGeneralTags as Tags +tagger = NameTBDTagger() + + class TestUtils: @pytest.mark.parametrize("mt_len, mt_pe_alignments, true_mt_shifts_mask", [ - (1, [(0, 0)], [False]), - (2, [(0, 0), (1, 1)], [False, False]), - (3, [(0, 0), (1, 1), (2, 2)], [False, False, False]), - (3, [(0, 0), (1, None), (2, 1)], [False, False, False]), + (1, [(0, 0, 0.9)], [False]), + (2, [(0, 0, 0.9), (1, 1, 0.9)], [False, False]), + (3, [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9)], [False, False, False]), + (3, [(0, 0, 0.9), (1, None, None), (2, 1, 0.9)], [False, False, False]), # easiest case - (2, [(0, 1), (1, 0)], [True, True]), + (2, [(0, 1, 0.9), (1, 0, 0.9)], [True, True]), # central one is not moved, but have crossing edges - (3, [(0, 2), (1, 1), (2, 0)], [True, True, True]), + (3, [(0, 2, 0.9), (1, 1, 0.9), (2, 0, 0.9)], [True, True, True]), # the central one deleted, so not shifted, no crossing edges - (3, [(0, 1), (1, None), (2, 0)], [True, False, True]), - # TODO: check with gabrielle - (4, [(0, 0), (1, 3), (1, 4), (1, 5), (2, 2), (2, 0), (3, None)], [False, True, True, False]), + (3, [(0, 1, 0.9), (1, None, None), (2, 0, 0.9)], [True, False, True]), + (4, [(0, 0, 0.9), (1, 3, 0.9), (1, 4, 0.9), (1, 5, 0.9), (2, 2, 0.9), (2, 0, 0.9), (3, None, None)], [False, True, True, False]), ]) def test_detect_crossing_edges(self, mt_len: int, mt_pe_alignments: List[Tuple[int, int]], true_mt_shifts_mask: List[bool]) -> None: - tagger = NameTBDTagger() mt_shifts_mask = tagger._detect_crossing_edges([str(i) for i in range(mt_len)], [str(i) for i in range(mt_len)], mt_pe_alignments) assert mt_shifts_mask == true_mt_shifts_mask class TestTagsFromEdits: @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - (["A", "B"], ["A", "B"], [(0, 0), (1, 1)], [{Tags.OK}, {Tags.OK}]), - (["A", "B", "C", "D"], ["A", "B", "C", "D"], [(0, 0), (1, 1), (2, 2), (3, 3)], [{Tags.OK}, {Tags.OK}, {Tags.OK}, {Tags.OK}]), + (["A", "B"], ["A", "B"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.OK}, {Tags.OK}]), + (["A", "B", "C", "D"], ["A", "B", "C", "D"], [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9), (3, 3, 0.9)], [{Tags.OK}, {Tags.OK}, {Tags.OK}, {Tags.OK}]), ([], [], [], []), ]) def test_single_error_ok( @@ -41,15 +46,14 @@ def test_single_error_ok( mt_pe_alignments: List[Tuple[int, int]], true_mt_tags: List[Set[StrEnum]], ) -> None: - tagger = NameTBDTagger() predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): assert predicted_tags == {t.value for t in true_tags} @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - (["A", "B", "C"], ["A", "X", "Z"], [(0, 0), (1, 1), (2, 2)], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}]), - (["A", "B"], ["Z", "X"], [(0, 0), (1, 1)], [{Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}]), + (["A", "B", "C"], ["A", "X", "Z"], [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9)], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}]), + (["A", "B"], ["Z", "X"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}]), # For 1-n and n-1 cases see contraction and expansion tests ]) def test_single_error_substitution( @@ -59,16 +63,15 @@ def test_single_error_substitution( mt_pe_alignments: List[Tuple[int, int]], true_mt_tags: List[Set[StrEnum]], ) -> None: - tagger = NameTBDTagger() predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): assert predicted_tags == {t.value for t in true_tags} @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - (["A", "B"], ["A"], [(0, 0), (1, None)], [{Tags.OK}, {Tags.BAD_INSERTION}]), - (["A", "B"], ["B"], [(0, None), (1, 0)], [{Tags.BAD_INSERTION}, {Tags.OK}]), - (["A", "B"], [], [(0, None), (1, None)], [{Tags.BAD_INSERTION}, {Tags.BAD_INSERTION}]), + (["A", "B"], ["A"], [(0, 0, 0.9), (1, None, None)], [{Tags.OK}, {Tags.BAD_INSERTION}]), + (["A", "B"], ["B"], [(0, None, None), (1, 0, 0.9)], [{Tags.BAD_INSERTION}, {Tags.OK}]), + (["A", "B"], [], [(0, None, None), (1, None, None)], [{Tags.BAD_INSERTION}, {Tags.BAD_INSERTION}]), # For 1-n and n-1 cases see contraction and expansion tests ]) def test_single_error_insertion( @@ -78,24 +81,23 @@ def test_single_error_insertion( mt_pe_alignments: List[Tuple[int, int]], true_mt_tags: List[Set[StrEnum]], ) -> None: - tagger = NameTBDTagger() predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): assert predicted_tags == {t.value for t in true_tags} @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - (["A"], ["A", "X"], [(0, 0), (None, 1)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}]), - (["A"], ["X", "A"], [(None, 0), (0, 1)], [{Tags.OK, Tags.BAD_DELETION_LEFT}]), - (["A", "B"], ["A", "X", "B"], [(0, 0), (None, 1), (1, 2)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_LEFT}]), + (["A"], ["A", "X"], [(0, 0, 0.9), (None, 1, None)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}]), + (["A"], ["X", "A"], [(None, 0, None), (0, 1, 0.9)], [{Tags.OK, Tags.BAD_DELETION_LEFT}]), + (["A", "B"], ["A", "X", "B"], [(0, 0, 0.9), (None, 1, None), (1, 2, 0.9)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_LEFT}]), # Delete multiple tokens, but tag error as deleted one - (["A"], ["A", "X", "Y", "Z"], [(0, 0), (None, 1), (None, 2), (None, 3)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}]), - (["A"], ["X", "Y", "Z", "A"], [(None, 0), (None, 1), (None, 2), (0, 3)], [{Tags.OK, Tags.BAD_DELETION_LEFT}]), - (["A", "B"], ["A", "X", "Y", "Z", "B"], [(0, 0), (None, 1), (None, 2), (None, 3), (1, 4)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_LEFT}]), + (["A"], ["A", "X", "Y", "Z"], [(0, 0, 0.9), (None, 1, None), (None, 2, None), (None, 3, None)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}]), + (["A"], ["X", "Y", "Z", "A"], [(None, 0, None), (None, 1, None), (None, 2, None), (0, 3, 0.9)], [{Tags.OK, Tags.BAD_DELETION_LEFT}]), + (["A", "B"], ["A", "X", "Y", "Z", "B"], [(0, 0, 0.9), (None, 1, None), (None, 2, None), (None, 3, None), (1, 4, 0.9)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_LEFT}]), # deleted both left and right sides - (["A"], ["X", "A", "Y"], [(None, 0), (0, 1), (None, 2)], [{Tags.OK, Tags.BAD_DELETION_LEFT, Tags.BAD_DELETION_RIGHT}]), + (["A"], ["X", "A", "Y"], [(None, 0, None), (0, 1, 0.9), (None, 2, None)], [{Tags.OK, Tags.BAD_DELETION_LEFT, Tags.BAD_DELETION_RIGHT}]), # deleted for empty target - ([], ["X"], [(None, 0)], []), + ([], ["X"], [(None, 0, None)], []), ]) def test_single_error_deletion( self, @@ -104,7 +106,6 @@ def test_single_error_deletion( mt_pe_alignments: List[Tuple[int, int]], true_mt_tags: List[Set[StrEnum]], ) -> None: - tagger = NameTBDTagger() predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): @@ -112,17 +113,17 @@ def test_single_error_deletion( @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ # Have same BBB token, so should filter CCC and TTT out as Deletion error and BBB as Ok - (["AAA", "BBB"], ["AAA", "BBB", "CCC", "TTT"], [(0, 0), (1, 1), (1, 2), (1, 3)], [{Tags.OK}, {Tags.OK, Tags.BAD_DELETION_RIGHT}]), - (["AAA", "BBB"], ["AAA", "TTT", "BBB", "CCC"], [(0, 0), (1, 1), (1, 2), (1, 3)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_RIGHT, Tags.BAD_DELETION_LEFT}]), + (["AAA", "BBB"], ["AAA", "BBB", "CCC", "TTT"], [(0, 0, 0.9), (1, 1, 0.9), (1, 2, 0.1), (1, 3, 0.1)], [{Tags.OK}, {Tags.OK, Tags.BAD_DELETION_RIGHT}]), + (["AAA", "BBB"], ["AAA", "TTT", "BBB", "CCC"], [(0, 0, 0.9), (1, 1, 0.1), (1, 2, 0.9), (1, 3, 0.1)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_RIGHT, Tags.BAD_DELETION_LEFT}]), # XXX, TTT and CCC >threshold are same BBB token, so its bad Contradiction - (["AAA", "BBB"], ["AAA", "XXX", "CCC", "TTT"], [(0, 0), (1, 1), (1, 2), (1, 3)], [{Tags.OK}, {Tags.BAD_CONTRACTION}]), + (["AAA", "BBB"], ["AAA", "XXX", "CCC", "TTT"], [(0, 0, 0.9), (1, 1, 0.9), (1, 2, 0.9), (1, 3, 0.9)], [{Tags.OK}, {Tags.BAD_CONTRACTION}]), # BBX is >threshold, CCC/TTT threshold, so all are Contractions - (["AAA", "BBB"], ["AAA", "BBX", "XBB"], [(0, 0), (1, 1), (1, 2)], [{Tags.OK}, {Tags.BAD_CONTRACTION}]), + (["AAA", "BBB"], ["AAA", "BBX", "XBB"], [(0, 0, 0.9), (1, 1, 0.9), (1, 2, 0.9)], [{Tags.OK}, {Tags.BAD_CONTRACTION}]), # BBX and XBB >threshold while TTT is None: - tagger = NameTBDTagger() predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): @@ -140,17 +140,17 @@ def test_single_error_contraction( @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ # BB token is same, so CCC and TTT are insertions - (["AAA", "BBB", "CCC", "TTT"], ["AAA", "BBB"], [(0, 0), (1, 1), (2, 1), (3, 1)], [{Tags.OK}, {Tags.OK}, {Tags.BAD_INSERTION}, {Tags.BAD_INSERTION}]), - (["AAA", "TTT", "BBB", "CCC"], ["AAA", "BBB"], [(0, 0), (1, 1), (2, 1), (3, 1)], [{Tags.OK}, {Tags.BAD_INSERTION}, {Tags.OK}, {Tags.BAD_INSERTION}]), + (["AAA", "BBB", "CCC", "TTT"], ["AAA", "BBB"], [(0, 0, 0.9), (1, 1, 0.9), (2, 1, 0.1), (3, 1, 0.1)], [{Tags.OK}, {Tags.OK}, {Tags.BAD_INSERTION}, {Tags.BAD_INSERTION}]), + (["AAA", "TTT", "BBB", "CCC"], ["AAA", "BBB"], [(0, 0, 0.9), (1, 1, 0.1), (2, 1, 0.9), (3, 1, 0.1)], [{Tags.OK}, {Tags.BAD_INSERTION}, {Tags.OK}, {Tags.BAD_INSERTION}]), # XXX, TTT and CCC >threshold are same BBB token, so its bad Expansion - (["AAA", "XXX", "CCC", "TTT"], ["AAA", "BBB"], [(0, 0), (1, 1), (2, 1), (3, 1)], [{Tags.OK}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}]), + (["AAA", "XXX", "CCC", "TTT"], ["AAA", "BBB"], [(0, 0, 0.9), (1, 1, 0.9), (2, 1, 0.9), (3, 1, 0.9)], [{Tags.OK}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}]), # BBX is >threshold, CCC/TTT threshold, so all are Expansion - (["AAA", "BBX", "XBB"], ["AAA", "BBB"], [(0, 0), (1, 1), (2, 1)], [{Tags.OK}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}]), + (["AAA", "BBX", "XBB"], ["AAA", "BBB"], [(0, 0, 0.9), (1, 1, 0.9), (2, 1, 0.9)], [{Tags.OK}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}]), # BBX and XBB >threshold while TTT is None: - tagger = NameTBDTagger() predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): @@ -168,13 +167,13 @@ def test_single_error_expansion( @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ # simple case - (["A", "B"], ["B", "A"], [(0, 1), (1, 0)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}]), + (["A", "B"], ["B", "A"], [(0, 1, 0.9), (1, 0, 0.9)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}]), # middle intact, but crossing edges, so shifted - (["A", "X", "Y", "B"], ["B", "X", "Y", "A"], [(0, 3), (1, 1), (2, 2), (3, 0)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}]), - # node inserted, so should not be marked as shifted TODO: check with gabrielle - (["A", "X", "B"], ["B", "A"], [(0, 1), (1, None), (2, 0)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.BAD_INSERTION}, {Tags.OK, Tags.BAD_SHIFTING}]), + (["A", "X", "Y", "B"], ["B", "X", "Y", "A"], [(0, 3, 0.9), (1, 1, 0.9), (2, 2, 0.9), (3, 0, 0.9)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}]), + # node inserted, so should not be marked as shifted + (["A", "X", "B"], ["B", "A"], [(0, 1, 0.9), (1, None, None), (2, 0, 0.9)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.BAD_INSERTION}, {Tags.OK, Tags.BAD_SHIFTING}]), # node deleted, nothing to mark as shifted - (["A", "B"], ["B", "X", "A"], [(0, 2), (None, 1), (1, 0)], [{Tags.OK, Tags.BAD_SHIFTING, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_SHIFTING, Tags.BAD_DELETION_LEFT}]), + (["A", "B"], ["B", "X", "A"], [(0, 2, 0.9), (None, 1, None), (1, 0, 0.9)], [{Tags.OK, Tags.BAD_SHIFTING, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_SHIFTING, Tags.BAD_DELETION_LEFT}]), ]) def test_single_error_shifted( self, @@ -183,8 +182,67 @@ def test_single_error_shifted( mt_pe_alignments: List[Tuple[int, int]], true_mt_tags: List[Set[StrEnum]], ) -> None: - tagger = NameTBDTagger() predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): assert predicted_tags == {t.value for t in true_tags} + + +class TestTagsToSource: + @pytest.mark.parametrize("src_tokens, mt_tokens, mt_pe_alignments, mt_tags, true_src_tags", [ + # ok cases + (["A", "B"], ["A", "B"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.OK}, {Tags.OK}], [{Tags.OK}, {Tags.OK}]), + (["A", "B", "C", "D"], ["A", "B", "C", "D"], [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9), (3, 3, 0.9)], [{Tags.OK}, {Tags.OK}, {Tags.OK}, {Tags.OK}], [{Tags.OK}, {Tags.OK}, {Tags.OK}, {Tags.OK}]), + ([], [], [], [], []), + # substitution cases + (["A", "B"], ["A", "C"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}]), + (["A", "B", "C", "D"], ["A", "B", "X", "D"], [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.1), (3, 3, 0.9)], [{Tags.OK}, {Tags.OK}, {Tags.BAD_SUBSTITUTION}, {Tags.OK}], [{Tags.OK}, {Tags.OK}, {Tags.BAD_SUBSTITUTION}, {Tags.OK}]), + # multiple tags + (["A", "B"], ["A", "C"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.BAD_SUBSTITUTION, Tags.BAD_DELETION_RIGHT}], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.BAD_SUBSTITUTION, Tags.BAD_DELETION_RIGHT}]), + ]) + def test_one_to_one( + self, + src_tokens: List[str], + mt_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + mt_tags: List[Set[StrEnum]], + true_src_tags: List[Set[StrEnum]], + ) -> None: + predicted_tags = tagger.tags_to_source([src_tokens], [mt_tokens], [mt_pe_alignments], [[{i.value for i in t} for t in mt_tags]])[0] + assert len(predicted_tags) == len(true_src_tags) + for predicted_tags, true_tags in zip(predicted_tags, true_src_tags): + assert predicted_tags == {t.value for t in true_tags} + + @pytest.mark.parametrize("src_tokens, mt_tokens, mt_pe_alignments, mt_tags, true_src_tags", [ + (["A"], ["A", "B"], [(0, 0, 0.9), (None, 1, None)], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}], [{Tags.OK}]), + (["A", "B"], ["A", "B", "C"], [(0, 0, 0.9), (1, 1, 0.9), (None, 2, None)], [{Tags.BAD_SUBSTITUTION}, {Tags.OK}, {Tags.OK}], [{Tags.BAD_SUBSTITUTION}, {Tags.OK}]), + ]) + def test_src_deleted( + self, + src_tokens: List[str], + mt_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + mt_tags: List[Set[StrEnum]], + true_src_tags: List[Set[StrEnum]], + ) -> None: + predicted_tags = tagger.tags_to_source([src_tokens], [mt_tokens], [mt_pe_alignments], [[{i.value for i in t} for t in mt_tags]])[0] + assert len(predicted_tags) == len(true_src_tags) + for predicted_tags, true_tags in zip(predicted_tags, true_src_tags): + assert predicted_tags == {t.value for t in true_tags} + + @pytest.mark.parametrize("src_tokens, mt_tokens, mt_pe_alignments, mt_tags, true_src_tags", [ + (["A", "B", "C"], ["A", "B"], [(0, 0, 0.9), (1, 1, 0.9), (2, None, None)], [{Tags.BAD_SUBSTITUTION}, {Tags.OK}], [{Tags.BAD_SUBSTITUTION}, {Tags.OK}, set()]), + (["A", "B", "C", "D"], ["B"], [(0, None, None), (1, 0, 0.9), (2, None, None), (3, None, None)], [{Tags.BAD_SUBSTITUTION}], [set(), {Tags.BAD_SUBSTITUTION}, set(), set()]), + ]) + def test_mt_deleted( + self, + src_tokens: List[str], + mt_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + mt_tags: List[Set[StrEnum]], + true_src_tags: List[Set[StrEnum]], + ) -> None: + predicted_tags = tagger.tags_to_source([src_tokens], [mt_tokens], [mt_pe_alignments], [[{i.value for i in t} for t in mt_tags]])[0] + assert len(predicted_tags) == len(true_src_tags) + for predicted_tags, true_tags in zip(predicted_tags, true_src_tags): + assert predicted_tags == {t.value for t in true_tags} From f9e375c4d79cbf8275d27f85eda53e26a729b0a9 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Thu, 4 May 2023 14:44:51 +0200 Subject: [PATCH 05/23] fix: filter None similarities when check deletions --- divemt/qe_taggers.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/divemt/qe_taggers.py b/divemt/qe_taggers.py index 094439b..4a8bc29 100644 --- a/divemt/qe_taggers.py +++ b/divemt/qe_taggers.py @@ -549,28 +549,28 @@ def tags_from_edits( # 1-n match for mt_node_id, connected_pe_nodes_ids, connected_pe_similarity in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=True, sort=True): if mt_node_id is not None and len(connected_pe_nodes_ids) > 1: - if all(sim < threshold for sim in connected_pe_similarity): + if all(sim < threshold for sim in connected_pe_similarity if sim is not None): continue - if all(sim > threshold for sim in connected_pe_similarity): + if all(sim > threshold for sim in connected_pe_similarity if sim is not None): continue aligns_remove_1_to_n.update([ (mt_node_id, pe_node_id, sim) for pe_node_id, sim in zip(connected_pe_nodes_ids, connected_pe_similarity) - if sim < threshold + if pe_node_id is not None and sim is not None and sim < threshold ]) # remove selected aligns and add None connected nodes instead mt_pe_sent_align = [(None, align[1], None) if align in aligns_remove_1_to_n else align for align in mt_pe_sent_align] # n-1 match for pe_node_id, connected_mt_nodes_ids, connected_mt_similarity in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=False, sort=True): if pe_node_id is not None and len(connected_mt_nodes_ids) > 1: - if all(sim < threshold for sim in connected_mt_similarity): + if all(sim < threshold for sim in connected_mt_similarity if sim is not None): continue - if all(sim > threshold for sim in connected_mt_similarity): + if all(sim > threshold for sim in connected_mt_similarity if sim is not None): continue aligns_remove_n_to_1.update([ (mt_node_id, pe_node_id, sim) for mt_node_id, sim in zip(connected_mt_nodes_ids, connected_mt_similarity) - if sim < threshold + if mt_node_id is not None and sim is not None and sim < threshold ]) # remove selected aligns and add None connected nodes instead mt_pe_sent_align = [(align[0], None, None) if align in aligns_remove_n_to_1 else align for align in mt_pe_sent_align] From 340bb8c873ea85f9188384124e566c564f5ca97f Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Fri, 5 May 2023 20:42:21 +0200 Subject: [PATCH 06/23] feat: function cache --- divemt/cache_utils.py | 140 ++++++++++++++++++++++++++++++++++++++++++ divemt/tag_utils.py | 15 +++-- 2 files changed, 149 insertions(+), 6 deletions(-) create mode 100644 divemt/cache_utils.py diff --git a/divemt/cache_utils.py b/divemt/cache_utils.py new file mode 100644 index 0000000..1428ed1 --- /dev/null +++ b/divemt/cache_utils.py @@ -0,0 +1,140 @@ +""" +The hashing idea adapted from https://death.andgravity.com/stable-hashing +https://github.com/lemon24/reader/blob/1efcd38c78f70dcc4e0d279e0fa2a0276749111e/src/reader/_hash_utils.py +""" +import dataclasses +import datetime +import functools +import hashlib +import inspect +import json +import pickle +from collections.abc import Collection +from pathlib import Path +from typing import Optional, Any, Dict, Callable + +import pandas as pd + + +_VERSION = 0 +_EXCLUDE = "_hash_exclude_" + + +def _json_dumps(thing: object) -> str: + return json.dumps( + thing, + default=_json_default, # force formatting-related options to known values + ensure_ascii=False, + sort_keys=True, + indent=None, + separators=(",", ":"), + ) + + +def _json_default(thing: object) -> Any: + try: + return _dataclass_dict(thing) + except TypeError: + pass + if isinstance(thing, datetime.datetime): + return thing.isoformat(timespec="microseconds") + raise TypeError(f"Object of type {type(thing).__name__} is not JSON serializable") + + +def _dataclass_dict(thing: object) -> Dict[str, Any]: + # we could have used dataclasses.asdict() + # with a dict_factory that drops empty values, + # but asdict() is recursive and we need to intercept and check + # the _hash_exclude_ of nested dataclasses; + # this way, json.dumps() does the recursion instead of asdict() + + # raises TypeError for non-dataclasses + fields = dataclasses.fields(thing) + # ... but doesn't for dataclass *types* + if isinstance(thing, type): + raise TypeError("got type, expected instance") + + exclude = getattr(thing, _EXCLUDE, ()) + + rv = {} + for field in fields: + if field.name in exclude: + continue + + value = getattr(thing, field.name) + if value is None or not value and isinstance(value, Collection): + continue + + rv[field.name] = value + + return rv + + +def calc_obj_hash(obj: object) -> bytes: + """Calculate hash of a single object""" + prefix = _VERSION.to_bytes(1, 'big') + hash_object = hashlib.sha256() + hash_object.update(_json_dumps(obj).encode("utf-8")) + return prefix + hash_object.digest() + + +def calc_args_hash(*args: Any, **kwargs: any) -> bytes: + """Calculate hash of arguments to function""" + prefix = _VERSION.to_bytes(1, 'big') + hash_object = hashlib.sha256() + for arg in args: + if isinstance(arg, pd.DataFrame) or isinstance(arg, pd.Series): + hash_object.update(str(pd.util.hash_pandas_object(arg).sum()).encode("utf-8")) + else: + hash_object.update(_json_dumps(arg).encode("utf-8")) + for key, value in kwargs.items(): + if isinstance(value, pd.DataFrame) or isinstance(value, pd.Series): + hash_object.update(key.encode("utf-8") + str(pd.util.hash_pandas_object(value).sum()).encode("utf-8")) + else: + hash_object.update(_json_dumps([key, value]).encode("utf-8")) + return prefix + hash_object.digest() + + +class CacheDecorator: + def __init__(self, cache_dir: Optional[Path] = None, version: int = 0): + self.version = version + self.cache_dir = cache_dir or Path(".cache") + + @staticmethod + def _is_bound_method(function: Callable, arg: Any): + return inspect.ismethod(function) or (hasattr(arg, "__class__") and function.__name__ in dir(arg.__class__)) + + def __call__(self, function: Callable) -> Any: + @functools.wraps(function) + def wrapper(*args: Any, **kwargs: Any) -> Any: + cache_key_args = args[1:] if self._is_bound_method(function, args[0]) else args + hash_val = calc_args_hash(*cache_key_args, **kwargs) + cache_file = self.cache_dir / f"{function.__name__}_v{self.version}_{hash_val.hex()}.pkl" + + # TODO: add logging, not printing + + if cache_file.exists(): + print(f"LOADING CACHE: {cache_file}") + with open(cache_file, "rb") as f: + return pickle.load(f) + else: + print(len(args), len(kwargs.items())) + result = function(*args, **kwargs) + print(f"CREATE CACHE: {cache_file}") + cache_file.parent.mkdir(parents=True, exist_ok=True) + with open(cache_file, "wb") as f: + pickle.dump(result, f) + return result + + return wrapper + + def __get__(self, instance, owner): + """note: adapted from chat-gpt-4 =)""" + # Support method decorators for class instances + if instance is None: + return self + + # Bind the decorated method to the instance + bound_method = functools.partial(self, instance) + + return bound_method diff --git a/divemt/tag_utils.py b/divemt/tag_utils.py index 71d4bdc..41189df 100644 --- a/divemt/tag_utils.py +++ b/divemt/tag_utils.py @@ -7,6 +7,8 @@ import stanza from tqdm import tqdm +from divemt.cache_utils import CacheDecorator + _STANZA_NLP_MAP = { "eng": {"lang": "en", "processors": "tokenize,pos,depparse,ner,lemma"}, "ara": {"lang": "ar", "processors": "tokenize,pos,depparse,ner,lemma,mwt"}, @@ -108,15 +110,14 @@ def get_tokens_annotations(text: Optional[str], lang: str) -> Tuple[Optional[Lis return tokens, annotations +@CacheDecorator() def texts2annotations(data: pd.DataFrame, unit_id_contains_lang: bool = True) -> pd.DataFrame: if "lang_id" not in data.columns and unit_id_contains_lang: data["lang_id"] = data.unit_id.str.split("-").map(lambda x: x[2]) - src_tokens = [] - src_annotations = [] - mt_tokens = [] - mt_annotations = [] - tgt_tokens = [] - tgt_annotations = [] + + src_tokens, mt_tokens, tgt_tokens = [], [], [] + src_annotations, mt_annotations, tgt_annotations = [], [], [] + for _i, row in tqdm(data.iterrows(), desc="Adding Stanza annotations...", total=len(data)): src_tok, src_ann = get_tokens_annotations(row.src_text, "eng") mt_tok, mt_ann = get_tokens_annotations(row.mt_text, row.lang_id) @@ -133,5 +134,7 @@ def texts2annotations(data: pd.DataFrame, unit_id_contains_lang: bool = True) -> data["mt_annotations"] = mt_annotations data["tgt_tokens"] = tgt_tokens data["tgt_annotations"] = tgt_annotations + clear_nlp_cache() + return data From 3926a2c2167b7d5947d6640bfe45b5f8d67ac308 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Fri, 5 May 2023 20:43:20 +0200 Subject: [PATCH 07/23] feat: function cache --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index eab2b88..6bfc3dd 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,8 @@ data/raw/vie/*/* tmp/* outputs/* +cache/ +.cache/ .idea/ From 4e90560e57d5fdadba17e61367a77efd8fa4ce72 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Fri, 5 May 2023 20:44:57 +0200 Subject: [PATCH 08/23] chore: add new comment for added material --- divemt/custom_simalign.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/divemt/custom_simalign.py b/divemt/custom_simalign.py index 77a9559..a7a4a7a 100644 --- a/divemt/custom_simalign.py +++ b/divemt/custom_simalign.py @@ -268,11 +268,13 @@ def get_word_aligns(self, src_sent: Union[str, List[str]], trg_sent: Union[str, for ext in self.matching_methods: if all_mats[ext][i, j] > 0: if self.token_type == "bpe": + # new: add similarity as third item if self.return_similarity: aligns[ext].add((l1_b2w_map[i], l2_b2w_map[j], words_similarity[l1_b2w_map[i], l2_b2w_map[j]])) else: aligns[ext].add((l1_b2w_map[i], l2_b2w_map[j])) else: + # new: add similarity as third item if self.return_similarity: aligns[ext].add((i, j, sim[i, j])) else: From bce4ac19c4d7285af5369b56354d94e289bb3100 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Tue, 9 May 2023 13:12:23 +0200 Subject: [PATCH 09/23] chore: cmd line argument to run augmentation --- README.md | 1 + divemt/parse_utils.py | 11 +++++++---- divemt/qe_taggers.py | 33 +++++++++++++++++++++++++++++++++ scripts/preprocess.py | 6 ++++++ 4 files changed, 47 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 9f51916..a44050d 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ python scripts/preprocess.py \ --add_extra \ --add_annotations \ --add_wmt22_quality_tags \ +--add_name_tbd_quality_tags \ --output_single \ --output_merged_subjects \ --output_merged_languages diff --git a/divemt/parse_utils.py b/divemt/parse_utils.py index 0a1d291..8d15935 100644 --- a/divemt/parse_utils.py +++ b/divemt/parse_utils.py @@ -20,7 +20,7 @@ from .cer import cer from .tag_utils import clear_nlp_cache, texts2annotations, tokenize -from .qe_taggers import QETagger, WMT22QETagger # isort: skip <- due to circular import with tag_utils +from .qe_taggers import QETagger, WMT22QETagger, NameTBDTagger # isort: skip <- due to circular import with tag_utils logger = logging.getLogger(__name__) @@ -356,7 +356,7 @@ def texts2qe( pe_texts["mt_text"].tolist(), pe_texts["tgt_text"].tolist(), "eng", - pe_texts.unit_id.str.split("-").map(lambda x: x[2]), + pe_texts.unit_id.str.split("-").map(lambda x: x[2]).tolist(), ) pe_texts[f"src_{tagger.ID}"] = src_tags pe_texts[f"mt_{tagger.ID}"] = mt_tags @@ -377,12 +377,12 @@ def parse_from_folder( add_extra_information: bool = False, add_annotations_information: bool = False, add_wmt22_quality_tags: bool = False, + add_name_tbd_quality_tags: bool = False, rounding: Optional[int] = None, ) -> Union[pd.DataFrame, Tuple[pd.DataFrame, pd.DataFrame]]: """Parse all .per XML files in a folder and return a single dataframe containing all units.""" metrics_list_dfs = [per2metrics(os.path.join(path, f)) for f in os.listdir(path) if f.endswith(".per")] metrics_df = pd.concat([df for df in metrics_list_dfs if df is not None], ignore_index=True) - if ( output_texts or add_edit_information @@ -407,10 +407,13 @@ def parse_from_folder( if add_extra_information: metrics_df = metrics2extra(metrics_df) if add_annotations_information: - texts_df = texts2annotations(texts_df) + texts_df = texts2annotations(texts_df) # TODO: make cache optional if add_wmt22_quality_tags: tagger = WMT22QETagger() texts_df = texts2qe(texts_df, tagger) + if add_name_tbd_quality_tags: + tagger = NameTBDTagger() # TODO: make cache optional + texts_df = texts2qe(texts_df, tagger) if time_ordered: if output_texts: diff --git a/divemt/qe_taggers.py b/divemt/qe_taggers.py index 4a8bc29..557f56a 100644 --- a/divemt/qe_taggers.py +++ b/divemt/qe_taggers.py @@ -10,6 +10,9 @@ from xml.sax.saxutils import escape from simalign import SentenceAligner + +from .cache_utils import CacheDecorator + if sys.version_info < (3, 11): from strenum import StrEnum else: @@ -397,8 +400,10 @@ def generate_tags( src_tokens, src_langs = self.get_tokenized(srcs, src_langs) mt_tokens, tgt_langs = self.get_tokenized(mts, tgt_langs) pe_tokens, _ = self.get_tokenized(pes, tgt_langs) + src_pe_alignments = self.align_source_pe(src_tokens, pe_tokens, tgt_langs) mt_pe_alignments = self.align_mt_pe(mt_tokens, pe_tokens) + mt_tags = self.tags_from_edits(mt_tokens, pe_tokens, mt_pe_alignments, use_gaps, omissions) src_tags = self.tags_to_source( src_tokens, @@ -408,7 +413,9 @@ def generate_tags( mt_pe_alignments, fluency_rule, ) + clear_nlp_cache() + return src_tags, mt_tags @@ -436,6 +443,27 @@ def __init__( ): self.aligner = aligner if aligner else CustomSentenceAligner(model="bert", token_type="bpe", matching_methods="mai", return_similarity="avg") + def _fill_deleted_inserted_tokens(self, len_from: int, len_to: int, alignments: List[TAlignment]) -> List[TAlignment]: + """As aligner provides only actual alignments, add required (None, i), (i, None) tokens""" + new_alignments: List[TAlignment] = [] + + # Add (i, None) in correct place (ordered by i) + current_alignment_index = 0 + for align in alignments: + # Add missing index pairs with None + while current_alignment_index < align[0]: + new_alignments.append((current_alignment_index, None)) + current_alignment_index += 1 + + # Add the current alignment pair + new_alignments.append(align) + current_alignment_index += 1 + + raise NotImplementedError() + + return new_alignments + + # @CacheDecorator() def align_source_mt( self, src_tokens: List[List[str]], @@ -450,6 +478,7 @@ def align_source_mt( ) ] + # @CacheDecorator() def align_mt_pe( self, mt_tokens: List[List[str]], @@ -701,11 +730,15 @@ def generate_tags( src_tokens, src_langs = self.get_tokenized(srcs, src_langs) mt_tokens, tgt_langs = self.get_tokenized(mts, tgt_langs) pe_tokens, _ = self.get_tokenized(pes, tgt_langs) + src_mt_alignments = self.align_source_mt(src_tokens, mt_tokens, src_langs, tgt_langs) mt_pe_alignments = self.align_mt_pe(mt_tokens, pe_tokens, tgt_langs) + mt_tags = self.tags_from_edits(mt_tokens, pe_tokens, mt_pe_alignments) src_tags = self.tags_to_source( src_tokens, pe_tokens, src_mt_alignments, mt_tags ) + clear_nlp_cache() + return src_tags, mt_tags diff --git a/scripts/preprocess.py b/scripts/preprocess.py index ceab942..656feec 100644 --- a/scripts/preprocess.py +++ b/scripts/preprocess.py @@ -54,6 +54,7 @@ def preprocess(args: argparse.Namespace): add_extra_information=args.add_extra, add_annotations_information=args.add_annotations, add_wmt22_quality_tags=args.add_wmt22_quality_tags, + add_name_tbd_quality_tags=args.add_name_tbd_quality_tags, rounding=args.rounding, ) if args.output_texts: @@ -183,6 +184,11 @@ def preprocess(args: argparse.Namespace): action="store_true", help="Whether to add WMT22 quality tags to the text dataframe", ) + parser.add_argument( + "--add_name_tbd_quality_tags", + action="store_true", + help="Whether to add NameTBD quality tags to the text dataframe", + ) parser.add_argument( "--output_single", action="store_true", From 1d69f9afa4f374e8b98e444f71288c43f535df15 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Tue, 9 May 2023 13:22:44 +0200 Subject: [PATCH 10/23] style: apply black and ruff --- divemt/cache_utils.py | 11 +- divemt/custom_simalign.py | 28 +- divemt/qe_taggers.py | 142 +++--- scripts/preprocess.py | 4 +- tests/test_qe_taggers_name_tbd_tagger.py | 601 ++++++++++++++++------- 5 files changed, 522 insertions(+), 264 deletions(-) diff --git a/divemt/cache_utils.py b/divemt/cache_utils.py index 1428ed1..c9ed2b4 100644 --- a/divemt/cache_utils.py +++ b/divemt/cache_utils.py @@ -11,11 +11,10 @@ import pickle from collections.abc import Collection from pathlib import Path -from typing import Optional, Any, Dict, Callable +from typing import Any, Callable, Dict, Optional import pandas as pd - _VERSION = 0 _EXCLUDE = "_hash_exclude_" @@ -72,7 +71,7 @@ def _dataclass_dict(thing: object) -> Dict[str, Any]: def calc_obj_hash(obj: object) -> bytes: """Calculate hash of a single object""" - prefix = _VERSION.to_bytes(1, 'big') + prefix = _VERSION.to_bytes(1, "big") hash_object = hashlib.sha256() hash_object.update(_json_dumps(obj).encode("utf-8")) return prefix + hash_object.digest() @@ -80,15 +79,15 @@ def calc_obj_hash(obj: object) -> bytes: def calc_args_hash(*args: Any, **kwargs: any) -> bytes: """Calculate hash of arguments to function""" - prefix = _VERSION.to_bytes(1, 'big') + prefix = _VERSION.to_bytes(1, "big") hash_object = hashlib.sha256() for arg in args: - if isinstance(arg, pd.DataFrame) or isinstance(arg, pd.Series): + if isinstance(arg, (pd.DataFrame, pd.Series)): hash_object.update(str(pd.util.hash_pandas_object(arg).sum()).encode("utf-8")) else: hash_object.update(_json_dumps(arg).encode("utf-8")) for key, value in kwargs.items(): - if isinstance(value, pd.DataFrame) or isinstance(value, pd.Series): + if isinstance(value, (pd.DataFrame, pd.Series)): hash_object.update(key.encode("utf-8") + str(pd.util.hash_pandas_object(value).sum()).encode("utf-8")) else: hash_object.update(_json_dumps([key, value]).encode("utf-8")) diff --git a/divemt/custom_simalign.py b/divemt/custom_simalign.py index a7a4a7a..579c402 100644 --- a/divemt/custom_simalign.py +++ b/divemt/custom_simalign.py @@ -7,7 +7,7 @@ - fix typings - logger deleted """ -from typing import Dict, List, Tuple, Union, Optional +from typing import Dict, List, Optional, Tuple, Union import numpy as np from scipy.sparse import csr_matrix @@ -20,21 +20,21 @@ nx = None import torch from transformers import ( + AutoConfig, + AutoModel, + AutoTokenizer, BertModel, BertTokenizer, - XLMModel, - XLMTokenizer, RobertaModel, RobertaTokenizer, + XLMModel, XLMRobertaModel, XLMRobertaTokenizer, - AutoConfig, - AutoModel, - AutoTokenizer, + XLMTokenizer, ) -class EmbeddingLoader(object): +class EmbeddingLoader: def __init__(self, model: str = "bert-base-multilingual-cased", device=torch.device("cpu"), layer: int = 8): TR_Models = { "bert-base-uncased": (BertModel, BertTokenizer), @@ -89,14 +89,16 @@ def get_embed_list(self, sent_batch: List[List[str]]) -> torch.Tensor: return None -class SentenceAligner(object): +class SentenceAligner: def __init__( self, model: str = "bert", token_type: str = "bpe", distortion: float = 0.0, matching_methods: str = "mai", - return_similarity: Optional[str] = None, # new: ["max", "avg"] type of average similarity for words from tokens + return_similarity: Optional[ + str + ] = None, # new: ["max", "avg"] type of average similarity for words from tokens device: str = "cpu", layer: int = 8, ): @@ -145,14 +147,14 @@ def average_embeds_over_words(bpe_vectors: np.ndarray, word_tokens_pair: List[Li w2b_map.append([]) for wlist in word_tokens_pair[0]: w2b_map[0].append([]) - for x in wlist: + for _x in wlist: w2b_map[0][-1].append(cnt) cnt += 1 cnt = 0 w2b_map.append([]) for wlist in word_tokens_pair[1]: w2b_map[1].append([]) - for x in wlist: + for _x in wlist: w2b_map[1][-1].append(cnt) cnt += 1 @@ -270,7 +272,9 @@ def get_word_aligns(self, src_sent: Union[str, List[str]], trg_sent: Union[str, if self.token_type == "bpe": # new: add similarity as third item if self.return_similarity: - aligns[ext].add((l1_b2w_map[i], l2_b2w_map[j], words_similarity[l1_b2w_map[i], l2_b2w_map[j]])) + aligns[ext].add( + (l1_b2w_map[i], l2_b2w_map[j], words_similarity[l1_b2w_map[i], l2_b2w_map[j]]) + ) else: aligns[ext].add((l1_b2w_map[i], l2_b2w_map[j])) else: diff --git a/divemt/qe_taggers.py b/divemt/qe_taggers.py index 557f56a..924a9cb 100644 --- a/divemt/qe_taggers.py +++ b/divemt/qe_taggers.py @@ -6,13 +6,11 @@ from collections import defaultdict from itertools import groupby from pathlib import Path -from typing import List, Optional, Tuple, Union, Set, Generator, Any +from typing import Any, Generator, List, Optional, Set, Tuple, Union from xml.sax.saxutils import escape from simalign import SentenceAligner -from .cache_utils import CacheDecorator - if sys.version_info < (3, 11): from strenum import StrEnum else: @@ -121,8 +119,8 @@ def generate_tags( (one per machine translation). Returns: - `Tuple[List[TTag], List[TTag]]`: A tuple containing the lists of quality tags for all source and the machine - translation sentence, respectively. + `Tuple[List[TTag], List[TTag]]`: A tuple containing the lists of quality tags for all source and the + machine translation sentence, respectively. """ pass @@ -421,17 +419,18 @@ def generate_tags( class NameTBDGeneralTags(StrEnum): """Error types tags for NameTBD.""" - OK = 'OK' # 1:1 - the MT uses the same single word as the PE - BAD_SUBSTITUTION = 'BAD-SUB' # 1:1 - the MT uses a different single word than the PE - BAD_DELETION_RIGHT = 'BAD-DEL-R' # None:1 - the MT does not have a word existed in PE, deletion on the right - BAD_DELETION_LEFT = 'BAD-DEL-L' # None:1 - the MT does not have a word existed in PE, deletion on the left - BAD_INSERTION = 'BAD-INS' # 1:None - the MT wrongly inserted a words that is not in the PE + OK = "OK" # 1:1 - the MT uses the same single word as the PE + BAD_SUBSTITUTION = "BAD-SUB" # 1:1 - the MT uses a different single word than the PE + + BAD_DELETION_RIGHT = "BAD-DEL-R" # None:1 - the MT does not have a word existed in PE, deletion on the right + BAD_DELETION_LEFT = "BAD-DEL-L" # None:1 - the MT does not have a word existed in PE, deletion on the left + BAD_INSERTION = "BAD-INS" # 1:None - the MT wrongly inserted a words that is not in the PE - BAD_SHIFTING = 'BAD-SHF' # for any number of tokens - detect crossing edges + BAD_SHIFTING = "BAD-SHF" # for any number of tokens - detect crossing edges - BAD_CONTRACTION = 'BAD-CON' # 1:n - the MT uses a single word instead of multiple words in the PE - BAD_EXPANSION = 'BAD-EXP' # n:1 - the MT uses a multiple words instead of one in the PE + BAD_CONTRACTION = "BAD-CON" # 1:n - the MT uses a single word instead of multiple words in the PE + BAD_EXPANSION = "BAD-EXP" # n:1 - the MT uses a multiple words instead of one in the PE class NameTBDTagger(QETagger): @@ -441,9 +440,15 @@ def __init__( self, aligner: Optional[CustomSentenceAligner] = None, ): - self.aligner = aligner if aligner else CustomSentenceAligner(model="bert", token_type="bpe", matching_methods="mai", return_similarity="avg") + self.aligner = ( + aligner + if aligner + else CustomSentenceAligner(model="bert", token_type="bpe", matching_methods="mai", return_similarity="avg") + ) - def _fill_deleted_inserted_tokens(self, len_from: int, len_to: int, alignments: List[TAlignment]) -> List[TAlignment]: + def _fill_deleted_inserted_tokens( + self, len_from: int, len_to: int, alignments: List[TAlignment] + ) -> List[TAlignment]: """As aligner provides only actual alignments, add required (None, i), (i, None) tokens""" new_alignments: List[TAlignment] = [] @@ -473,9 +478,7 @@ def align_source_mt( ) -> List[List[TAlignment]]: return [ self.aligner.get_word_aligns(src_tok, mt_tok)["inter"] - for src_tok, mt_tok in tqdm( - zip(src_tokens, mt_tokens), total=len(src_tokens), desc="Aligning src-mt" - ) + for src_tok, mt_tok in tqdm(zip(src_tokens, mt_tokens), total=len(src_tokens), desc="Aligning src-mt") ] # @CacheDecorator() @@ -487,23 +490,27 @@ def align_mt_pe( ) -> List[List[TAlignment]]: return [ self.aligner.get_word_aligns(mt_tok, pe_tok)["inter"] - for mt_tok, pe_tok in tqdm( - zip(mt_tokens, pe_tokens), total=len(mt_tokens), desc="Aligning mt-pe" - ) + for mt_tok, pe_tok in tqdm(zip(mt_tokens, pe_tokens), total=len(mt_tokens), desc="Aligning mt-pe") ] @staticmethod - def _group_by_node(alignments: List[Tuple[Optional[int], Optional[int]]], by_start_node: bool = True, sort: bool = False) -> Generator[Tuple[int, List[int], List[float]], None, None]: + def _group_by_node( + alignments: List[Tuple[Optional[int], Optional[int]]], by_start_node: bool = True, sort: bool = False + ) -> Generator[Tuple[int, List[int], List[float]], None, None]: """Yield a node id and a list of connected nodes.""" _by_index = 0 if by_start_node else 1 if sort: alignments = sorted(alignments, key=lambda x: x[_by_index] if x[_by_index] is not None else -1) for start_node, connected_alignments in groupby(alignments, lambda x: x[_by_index]): connected_alignments = list(connected_alignments) - yield start_node, [end_id if by_start_node else start_id for start_id, end_id, _ in connected_alignments], [similarity for _, _, similarity in connected_alignments] + yield start_node, [ + end_id if by_start_node else start_id for start_id, end_id, _ in connected_alignments + ], [similarity for _, _, similarity in connected_alignments] @staticmethod - def _detect_crossing_edges(mt_tokens: List[str], pe_tokens: List[str], alignments: List[Tuple[Optional[int], Optional[int], float]]) -> List[bool]: + def _detect_crossing_edges( + mt_tokens: List[str], pe_tokens: List[str], alignments: List[Tuple[Optional[int], Optional[int], float]] + ) -> List[bool]: """Detect crossing edges in the alignments. Return mask list of nodes that cross some other node.""" # TODO: optimize from n^2 to n as 2 pointers shifted_mt_mask = [False] * len(mt_tokens) @@ -537,7 +544,7 @@ def tags_from_edits( mt_pe_alignments: List[List[TAlignment]], threshold: float = 0.8, ) -> List[List[TTag]]: - """ Produce tags on MT tokens from edits found in the PE tokens. + """Produce tags on MT tokens from edits found in the PE tokens. Note: The tags indicate the type of error particular MT token is affected by. @@ -568,44 +575,60 @@ def tags_from_edits( mt_tags: List[List[Set[str]]] = [] - for mt_sent_tok, pe_sent_tok, mt_pe_sent_align in tqdm(zip(mt_tokens, pe_tokens, mt_pe_alignments), desc="Tagging MT", total=len(mt_tokens)): - + for mt_sent_tok, pe_sent_tok, mt_pe_sent_align in tqdm( + zip(mt_tokens, pe_tokens, mt_pe_alignments), desc="Tagging MT", total=len(mt_tokens) + ): mt_sent_tags: List[Set[str]] = [set() for _ in range(len(mt_sent_tok))] # clear 1-n and n-1 nodes with low threshold - # e.g. if 1-n or n-1 have same token or high similarity, remove low similarity as deletions/insertions (None:1 and 1:None) + # e.g. if 1-n or n-1 have same token or high similarity, remove low similarity as deletions/insertions + # (None:1 and 1:None) aligns_remove_1_to_n, aligns_remove_n_to_1 = set(), set() # 1-n match - for mt_node_id, connected_pe_nodes_ids, connected_pe_similarity in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=True, sort=True): + for mt_node_id, connected_pe_nodes_ids, connected_pe_similarity in NameTBDTagger._group_by_node( + mt_pe_sent_align, by_start_node=True, sort=True + ): if mt_node_id is not None and len(connected_pe_nodes_ids) > 1: if all(sim < threshold for sim in connected_pe_similarity if sim is not None): continue if all(sim > threshold for sim in connected_pe_similarity if sim is not None): continue - aligns_remove_1_to_n.update([ - (mt_node_id, pe_node_id, sim) - for pe_node_id, sim in zip(connected_pe_nodes_ids, connected_pe_similarity) - if pe_node_id is not None and sim is not None and sim < threshold - ]) + aligns_remove_1_to_n.update( + [ + (mt_node_id, pe_node_id, sim) + for pe_node_id, sim in zip(connected_pe_nodes_ids, connected_pe_similarity) + if pe_node_id is not None and sim is not None and sim < threshold + ] + ) # remove selected aligns and add None connected nodes instead - mt_pe_sent_align = [(None, align[1], None) if align in aligns_remove_1_to_n else align for align in mt_pe_sent_align] + mt_pe_sent_align = [ + (None, align[1], None) if align in aligns_remove_1_to_n else align for align in mt_pe_sent_align + ] # n-1 match - for pe_node_id, connected_mt_nodes_ids, connected_mt_similarity in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=False, sort=True): + for pe_node_id, connected_mt_nodes_ids, connected_mt_similarity in NameTBDTagger._group_by_node( + mt_pe_sent_align, by_start_node=False, sort=True + ): if pe_node_id is not None and len(connected_mt_nodes_ids) > 1: if all(sim < threshold for sim in connected_mt_similarity if sim is not None): continue if all(sim > threshold for sim in connected_mt_similarity if sim is not None): continue - aligns_remove_n_to_1.update([ - (mt_node_id, pe_node_id, sim) - for mt_node_id, sim in zip(connected_mt_nodes_ids, connected_mt_similarity) - if mt_node_id is not None and sim is not None and sim < threshold - ]) + aligns_remove_n_to_1.update( + [ + (mt_node_id, pe_node_id, sim) + for mt_node_id, sim in zip(connected_mt_nodes_ids, connected_mt_similarity) + if mt_node_id is not None and sim is not None and sim < threshold + ] + ) # remove selected aligns and add None connected nodes instead - mt_pe_sent_align = [(align[0], None, None) if align in aligns_remove_n_to_1 else align for align in mt_pe_sent_align] + mt_pe_sent_align = [ + (align[0], None, None) if align in aligns_remove_n_to_1 else align for align in mt_pe_sent_align + ] # Solve all n-1: setup expansions tags and solve n-1 matches < threshold as smth+insertion - for pe_node_id, connected_mt_nodes_ids, _ in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=False, sort=True): + for pe_node_id, connected_mt_nodes_ids, _ in NameTBDTagger._group_by_node( + mt_pe_sent_align, by_start_node=False, sort=True + ): if pe_node_id is not None and len(connected_mt_nodes_ids) > 1: # expansion, mark related mt nodes for mt_node_id in connected_mt_nodes_ids: @@ -614,7 +637,9 @@ def tags_from_edits( # Solve all deletions, add deletion tags on left and right sides mt_position = 0 - for mt_node_id, connected_pe_nodes_ids, _ in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=True, sort=False): + for mt_node_id, connected_pe_nodes_ids, _ in NameTBDTagger._group_by_node( + mt_pe_sent_align, by_start_node=True, sort=False + ): if mt_node_id is None: # deleted word error, mark left and right modes if 0 <= mt_position - 1 < len(mt_sent_tags): @@ -627,7 +652,9 @@ def tags_from_edits( mt_pe_sent_align = [align for align in mt_pe_sent_align if align[0] is not None] # Solve all 1-n matches - for mt_node_id, connected_pe_nodes_ids, _ in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=True, sort=True): + for mt_node_id, connected_pe_nodes_ids, _ in NameTBDTagger._group_by_node( + mt_pe_sent_align, by_start_node=True, sort=True + ): assert mt_node_id is not None, "Already should be filtered all (None, smth) cases" if NameTBDGeneralTags.BAD_EXPANSION.value in mt_sent_tags[mt_node_id]: continue @@ -645,7 +672,9 @@ def tags_from_edits( mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.OK.value) # Add shifted tags if so - for mt_node_id, mask in enumerate(NameTBDTagger._detect_crossing_edges(mt_sent_tok, pe_sent_tok, mt_pe_sent_align)): + for mt_node_id, mask in enumerate( + NameTBDTagger._detect_crossing_edges(mt_sent_tok, pe_sent_tok, mt_pe_sent_align) + ): if mask: mt_sent_tags[mt_node_id].add(NameTBDGeneralTags.BAD_SHIFTING.value) @@ -654,7 +683,7 @@ def tags_from_edits( # Basic sanity check assert all( - [len(mt_sent_tokens) == len(mt_sent_tags) for mt_sent_tokens, mt_sent_tags in zip(mt_tokens, mt_tags)] + len(mt_sent_tokens) == len(mt_sent_tags) for mt_sent_tokens, mt_sent_tags in zip(mt_tokens, mt_tags) ), "MT tags creation failed, number of tokens and tags do not match" return mt_tags @@ -665,7 +694,7 @@ def tags_to_source( src_mt_alignments: List[List[TAlignment]], mt_tags: List[List[Set[str]]], ) -> List[List[TTag]]: - """ Propagate tags from MT to source. + """Propagate tags from MT to source. # TODO: update docstring with the final logic The following cases are considered: @@ -683,12 +712,15 @@ def tags_to_source( src_tags: List[List[Set[str]]] = [] - for src_sent_tok, mt_sent_tok, mt_sent_tags, mt_pe_sent_align in tqdm(zip(src_tokens, mt_tokens, mt_tags, src_mt_alignments), desc="Transfer to source", total=len(src_tokens)): - + for src_sent_tok, _mt_sent_tok, mt_sent_tags, mt_pe_sent_align in tqdm( + zip(src_tokens, mt_tokens, mt_tags, src_mt_alignments), desc="Transfer to source", total=len(src_tokens) + ): src_sent_tags: List[Set[str]] = [set() for _ in range(len(src_sent_tok))] # Solve all as 1-n matches - for src_node_id, connected_mt_nodes_ids, connected_mt_similarity in NameTBDTagger._group_by_node(mt_pe_sent_align, by_start_node=True, sort=True): + for src_node_id, connected_mt_nodes_ids, connected_mt_similarity in NameTBDTagger._group_by_node( + mt_pe_sent_align, by_start_node=True, sort=True + ): if src_node_id is None: continue elif len(connected_mt_nodes_ids) == 0: @@ -716,7 +748,9 @@ def tags_to_source( src_tags.append(src_sent_tags) # Basic sanity checks - assert all(len(aa) == len(bb) for aa, bb in zip(src_tokens, src_tags)), "Source tags creation failed, number of tokens and tags do not match" + assert all( + len(aa) == len(bb) for aa, bb in zip(src_tokens, src_tags) + ), "Source tags creation failed, number of tokens and tags do not match" return src_tags def generate_tags( @@ -735,9 +769,7 @@ def generate_tags( mt_pe_alignments = self.align_mt_pe(mt_tokens, pe_tokens, tgt_langs) mt_tags = self.tags_from_edits(mt_tokens, pe_tokens, mt_pe_alignments) - src_tags = self.tags_to_source( - src_tokens, pe_tokens, src_mt_alignments, mt_tags - ) + src_tags = self.tags_to_source(src_tokens, pe_tokens, src_mt_alignments, mt_tags) clear_nlp_cache() diff --git a/scripts/preprocess.py b/scripts/preprocess.py index 656feec..0154afa 100644 --- a/scripts/preprocess.py +++ b/scripts/preprocess.py @@ -27,9 +27,7 @@ def preprocess(args: argparse.Namespace): for path in lang_output_paths.values(): os.makedirs(path, exist_ok=True) if args.tasks is None: - tasks = { - lang: list({f.split("_")[1] for f in os.listdir(lang_source_paths[lang])}) for lang in args.languages - } + tasks = {lang: list({f.split("_")[1] for f in os.listdir(lang_source_paths[lang])}) for lang in args.languages} else: tasks = {lang: args.tasks for lang in args.languages} results_dict = {lang: {task: [] for task in tasks[lang]} for lang in args.languages} diff --git a/tests/test_qe_taggers_name_tbd_tagger.py b/tests/test_qe_taggers_name_tbd_tagger.py index a087eba..0350f5f 100644 --- a/tests/test_qe_taggers_name_tbd_tagger.py +++ b/tests/test_qe_taggers_name_tbd_tagger.py @@ -1,248 +1,473 @@ import sys -from typing import List, Tuple, Set +from typing import List, Set, Tuple import pytest + if sys.version_info < (3, 11): from strenum import StrEnum else: from enum import StrEnum -from divemt.qe_taggers import NameTBDTagger from divemt.qe_taggers import NameTBDGeneralTags as Tags - +from divemt.qe_taggers import NameTBDTagger tagger = NameTBDTagger() class TestUtils: - @pytest.mark.parametrize("mt_len, mt_pe_alignments, true_mt_shifts_mask", [ - (1, [(0, 0, 0.9)], [False]), - (2, [(0, 0, 0.9), (1, 1, 0.9)], [False, False]), - (3, [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9)], [False, False, False]), - (3, [(0, 0, 0.9), (1, None, None), (2, 1, 0.9)], [False, False, False]), - # easiest case - (2, [(0, 1, 0.9), (1, 0, 0.9)], [True, True]), - # central one is not moved, but have crossing edges - (3, [(0, 2, 0.9), (1, 1, 0.9), (2, 0, 0.9)], [True, True, True]), - # the central one deleted, so not shifted, no crossing edges - (3, [(0, 1, 0.9), (1, None, None), (2, 0, 0.9)], [True, False, True]), - (4, [(0, 0, 0.9), (1, 3, 0.9), (1, 4, 0.9), (1, 5, 0.9), (2, 2, 0.9), (2, 0, 0.9), (3, None, None)], [False, True, True, False]), - ]) - def test_detect_crossing_edges(self, mt_len: int, mt_pe_alignments: List[Tuple[int, int]], true_mt_shifts_mask: List[bool]) -> None: - mt_shifts_mask = tagger._detect_crossing_edges([str(i) for i in range(mt_len)], [str(i) for i in range(mt_len)], mt_pe_alignments) + @pytest.mark.parametrize( + "mt_len, mt_pe_alignments, true_mt_shifts_mask", + [ + (1, [(0, 0, 0.9)], [False]), + (2, [(0, 0, 0.9), (1, 1, 0.9)], [False, False]), + (3, [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9)], [False, False, False]), + (3, [(0, 0, 0.9), (1, None, None), (2, 1, 0.9)], [False, False, False]), + # easiest case + (2, [(0, 1, 0.9), (1, 0, 0.9)], [True, True]), + # central one is not moved, but have crossing edges + (3, [(0, 2, 0.9), (1, 1, 0.9), (2, 0, 0.9)], [True, True, True]), + # the central one deleted, so not shifted, no crossing edges + (3, [(0, 1, 0.9), (1, None, None), (2, 0, 0.9)], [True, False, True]), + ( + 4, + [(0, 0, 0.9), (1, 3, 0.9), (1, 4, 0.9), (1, 5, 0.9), (2, 2, 0.9), (2, 0, 0.9), (3, None, None)], + [False, True, True, False], + ), + ], + ) + def test_detect_crossing_edges( + self, mt_len: int, mt_pe_alignments: List[Tuple[int, int]], true_mt_shifts_mask: List[bool] + ) -> None: + mt_shifts_mask = tagger._detect_crossing_edges( + [str(i) for i in range(mt_len)], [str(i) for i in range(mt_len)], mt_pe_alignments + ) assert mt_shifts_mask == true_mt_shifts_mask class TestTagsFromEdits: - @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - (["A", "B"], ["A", "B"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.OK}, {Tags.OK}]), - (["A", "B", "C", "D"], ["A", "B", "C", "D"], [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9), (3, 3, 0.9)], [{Tags.OK}, {Tags.OK}, {Tags.OK}, {Tags.OK}]), - ([], [], [], []), - ]) + @pytest.mark.parametrize( + "mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", + [ + (["A", "B"], ["A", "B"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.OK}, {Tags.OK}]), + ( + ["A", "B", "C", "D"], + ["A", "B", "C", "D"], + [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9), (3, 3, 0.9)], + [{Tags.OK}, {Tags.OK}, {Tags.OK}, {Tags.OK}], + ), + ([], [], [], []), + ], + ) def test_single_error_ok( - self, - mt_tokens: List[str], - pe_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], - true_mt_tags: List[Set[StrEnum]], + self, + mt_tokens: List[str], + pe_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + true_mt_tags: List[Set[StrEnum]], ) -> None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) - for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): - assert predicted_tags == {t.value for t in true_tags} - - @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - (["A", "B", "C"], ["A", "X", "Z"], [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9)], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}]), - (["A", "B"], ["Z", "X"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}]), - # For 1-n and n-1 cases see contraction and expansion tests - ]) + for pred_tags, true_tags in zip(predicted_tags, true_mt_tags): + assert pred_tags == {t.value for t in true_tags} + + @pytest.mark.parametrize( + "mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", + [ + ( + ["A", "B", "C"], + ["A", "X", "Z"], + [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9)], + [{Tags.OK}, {Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}], + ), + (["A", "B"], ["Z", "X"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}]), + # For 1-n and n-1 cases see contraction and expansion tests + ], + ) def test_single_error_substitution( - self, - mt_tokens: List[str], - pe_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], - true_mt_tags: List[Set[StrEnum]], + self, + mt_tokens: List[str], + pe_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + true_mt_tags: List[Set[StrEnum]], ) -> None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) - for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): - assert predicted_tags == {t.value for t in true_tags} - - @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - (["A", "B"], ["A"], [(0, 0, 0.9), (1, None, None)], [{Tags.OK}, {Tags.BAD_INSERTION}]), - (["A", "B"], ["B"], [(0, None, None), (1, 0, 0.9)], [{Tags.BAD_INSERTION}, {Tags.OK}]), - (["A", "B"], [], [(0, None, None), (1, None, None)], [{Tags.BAD_INSERTION}, {Tags.BAD_INSERTION}]), - # For 1-n and n-1 cases see contraction and expansion tests - ]) + for pred_tags, true_tags in zip(predicted_tags, true_mt_tags): + assert pred_tags == {t.value for t in true_tags} + + @pytest.mark.parametrize( + "mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", + [ + (["A", "B"], ["A"], [(0, 0, 0.9), (1, None, None)], [{Tags.OK}, {Tags.BAD_INSERTION}]), + (["A", "B"], ["B"], [(0, None, None), (1, 0, 0.9)], [{Tags.BAD_INSERTION}, {Tags.OK}]), + (["A", "B"], [], [(0, None, None), (1, None, None)], [{Tags.BAD_INSERTION}, {Tags.BAD_INSERTION}]), + # For 1-n and n-1 cases see contraction and expansion tests + ], + ) def test_single_error_insertion( - self, - mt_tokens: List[str], - pe_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], - true_mt_tags: List[Set[StrEnum]], + self, + mt_tokens: List[str], + pe_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + true_mt_tags: List[Set[StrEnum]], ) -> None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) - for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): - assert predicted_tags == {t.value for t in true_tags} - - @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - (["A"], ["A", "X"], [(0, 0, 0.9), (None, 1, None)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}]), - (["A"], ["X", "A"], [(None, 0, None), (0, 1, 0.9)], [{Tags.OK, Tags.BAD_DELETION_LEFT}]), - (["A", "B"], ["A", "X", "B"], [(0, 0, 0.9), (None, 1, None), (1, 2, 0.9)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_LEFT}]), - # Delete multiple tokens, but tag error as deleted one - (["A"], ["A", "X", "Y", "Z"], [(0, 0, 0.9), (None, 1, None), (None, 2, None), (None, 3, None)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}]), - (["A"], ["X", "Y", "Z", "A"], [(None, 0, None), (None, 1, None), (None, 2, None), (0, 3, 0.9)], [{Tags.OK, Tags.BAD_DELETION_LEFT}]), - (["A", "B"], ["A", "X", "Y", "Z", "B"], [(0, 0, 0.9), (None, 1, None), (None, 2, None), (None, 3, None), (1, 4, 0.9)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_LEFT}]), - # deleted both left and right sides - (["A"], ["X", "A", "Y"], [(None, 0, None), (0, 1, 0.9), (None, 2, None)], [{Tags.OK, Tags.BAD_DELETION_LEFT, Tags.BAD_DELETION_RIGHT}]), - # deleted for empty target - ([], ["X"], [(None, 0, None)], []), - ]) + for pred_tags, true_tags in zip(predicted_tags, true_mt_tags): + assert pred_tags == {t.value for t in true_tags} + + @pytest.mark.parametrize( + "mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", + [ + (["A"], ["A", "X"], [(0, 0, 0.9), (None, 1, None)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}]), + (["A"], ["X", "A"], [(None, 0, None), (0, 1, 0.9)], [{Tags.OK, Tags.BAD_DELETION_LEFT}]), + ( + ["A", "B"], + ["A", "X", "B"], + [(0, 0, 0.9), (None, 1, None), (1, 2, 0.9)], + [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_LEFT}], + ), + # Delete multiple tokens, but tag error as deleted one + ( + ["A"], + ["A", "X", "Y", "Z"], + [(0, 0, 0.9), (None, 1, None), (None, 2, None), (None, 3, None)], + [{Tags.OK, Tags.BAD_DELETION_RIGHT}], + ), + ( + ["A"], + ["X", "Y", "Z", "A"], + [(None, 0, None), (None, 1, None), (None, 2, None), (0, 3, 0.9)], + [{Tags.OK, Tags.BAD_DELETION_LEFT}], + ), + ( + ["A", "B"], + ["A", "X", "Y", "Z", "B"], + [(0, 0, 0.9), (None, 1, None), (None, 2, None), (None, 3, None), (1, 4, 0.9)], + [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_LEFT}], + ), + # deleted both left and right sides + ( + ["A"], + ["X", "A", "Y"], + [(None, 0, None), (0, 1, 0.9), (None, 2, None)], + [{Tags.OK, Tags.BAD_DELETION_LEFT, Tags.BAD_DELETION_RIGHT}], + ), + # deleted for empty target + ([], ["X"], [(None, 0, None)], []), + ], + ) def test_single_error_deletion( - self, - mt_tokens: List[str], - pe_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], - true_mt_tags: List[Set[StrEnum]], + self, + mt_tokens: List[str], + pe_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + true_mt_tags: List[Set[StrEnum]], ) -> None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) - for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): - assert predicted_tags == {t.value for t in true_tags} - - @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - # Have same BBB token, so should filter CCC and TTT out as Deletion error and BBB as Ok - (["AAA", "BBB"], ["AAA", "BBB", "CCC", "TTT"], [(0, 0, 0.9), (1, 1, 0.9), (1, 2, 0.1), (1, 3, 0.1)], [{Tags.OK}, {Tags.OK, Tags.BAD_DELETION_RIGHT}]), - (["AAA", "BBB"], ["AAA", "TTT", "BBB", "CCC"], [(0, 0, 0.9), (1, 1, 0.1), (1, 2, 0.9), (1, 3, 0.1)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_DELETION_RIGHT, Tags.BAD_DELETION_LEFT}]), - # XXX, TTT and CCC >threshold are same BBB token, so its bad Contradiction - (["AAA", "BBB"], ["AAA", "XXX", "CCC", "TTT"], [(0, 0, 0.9), (1, 1, 0.9), (1, 2, 0.9), (1, 3, 0.9)], [{Tags.OK}, {Tags.BAD_CONTRACTION}]), - # BBX is >threshold, CCC/TTT threshold, so all are Contractions - (["AAA", "BBB"], ["AAA", "BBX", "XBB"], [(0, 0, 0.9), (1, 1, 0.9), (1, 2, 0.9)], [{Tags.OK}, {Tags.BAD_CONTRACTION}]), - # BBX and XBB >threshold while TTT is threshold are same BBB token, so its bad Contradiction + ( + ["AAA", "BBB"], + ["AAA", "XXX", "CCC", "TTT"], + [(0, 0, 0.9), (1, 1, 0.9), (1, 2, 0.9), (1, 3, 0.9)], + [{Tags.OK}, {Tags.BAD_CONTRACTION}], + ), + # BBX is >threshold, CCC/TTT threshold, so all are Contractions + ( + ["AAA", "BBB"], + ["AAA", "BBX", "XBB"], + [(0, 0, 0.9), (1, 1, 0.9), (1, 2, 0.9)], + [{Tags.OK}, {Tags.BAD_CONTRACTION}], + ), + # BBX and XBB >threshold while TTT is None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) - for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): - assert predicted_tags == {t.value for t in true_tags} - - @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - # BB token is same, so CCC and TTT are insertions - (["AAA", "BBB", "CCC", "TTT"], ["AAA", "BBB"], [(0, 0, 0.9), (1, 1, 0.9), (2, 1, 0.1), (3, 1, 0.1)], [{Tags.OK}, {Tags.OK}, {Tags.BAD_INSERTION}, {Tags.BAD_INSERTION}]), - (["AAA", "TTT", "BBB", "CCC"], ["AAA", "BBB"], [(0, 0, 0.9), (1, 1, 0.1), (2, 1, 0.9), (3, 1, 0.1)], [{Tags.OK}, {Tags.BAD_INSERTION}, {Tags.OK}, {Tags.BAD_INSERTION}]), - # XXX, TTT and CCC >threshold are same BBB token, so its bad Expansion - (["AAA", "XXX", "CCC", "TTT"], ["AAA", "BBB"], [(0, 0, 0.9), (1, 1, 0.9), (2, 1, 0.9), (3, 1, 0.9)], [{Tags.OK}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}]), - # BBX is >threshold, CCC/TTT threshold, so all are Expansion - (["AAA", "BBX", "XBB"], ["AAA", "BBB"], [(0, 0, 0.9), (1, 1, 0.9), (2, 1, 0.9)], [{Tags.OK}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}]), - # BBX and XBB >threshold while TTT is threshold are same BBB token, so its bad Expansion + ( + ["AAA", "XXX", "CCC", "TTT"], + ["AAA", "BBB"], + [(0, 0, 0.9), (1, 1, 0.9), (2, 1, 0.9), (3, 1, 0.9)], + [{Tags.OK}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}], + ), + # BBX is >threshold, CCC/TTT threshold, so all are Expansion + ( + ["AAA", "BBX", "XBB"], + ["AAA", "BBB"], + [(0, 0, 0.9), (1, 1, 0.9), (2, 1, 0.9)], + [{Tags.OK}, {Tags.BAD_EXPANSION}, {Tags.BAD_EXPANSION}], + ), + # BBX and XBB >threshold while TTT is None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) - for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): - assert predicted_tags == {t.value for t in true_tags} - - @pytest.mark.parametrize("mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - # simple case - (["A", "B"], ["B", "A"], [(0, 1, 0.9), (1, 0, 0.9)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}]), - # middle intact, but crossing edges, so shifted - (["A", "X", "Y", "B"], ["B", "X", "Y", "A"], [(0, 3, 0.9), (1, 1, 0.9), (2, 2, 0.9), (3, 0, 0.9)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}]), - # node inserted, so should not be marked as shifted - (["A", "X", "B"], ["B", "A"], [(0, 1, 0.9), (1, None, None), (2, 0, 0.9)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.BAD_INSERTION}, {Tags.OK, Tags.BAD_SHIFTING}]), - # node deleted, nothing to mark as shifted - (["A", "B"], ["B", "X", "A"], [(0, 2, 0.9), (None, 1, None), (1, 0, 0.9)], [{Tags.OK, Tags.BAD_SHIFTING, Tags.BAD_DELETION_RIGHT}, {Tags.OK, Tags.BAD_SHIFTING, Tags.BAD_DELETION_LEFT}]), - ]) + for pred_tags, true_tags in zip(predicted_tags, true_mt_tags): + assert pred_tags == {t.value for t in true_tags} + + @pytest.mark.parametrize( + "mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", + [ + # simple case + ( + ["A", "B"], + ["B", "A"], + [(0, 1, 0.9), (1, 0, 0.9)], + [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.OK, Tags.BAD_SHIFTING}], + ), + # middle intact, but crossing edges, so shifted + ( + ["A", "X", "Y", "B"], + ["B", "X", "Y", "A"], + [(0, 3, 0.9), (1, 1, 0.9), (2, 2, 0.9), (3, 0, 0.9)], + [ + {Tags.OK, Tags.BAD_SHIFTING}, + {Tags.OK, Tags.BAD_SHIFTING}, + {Tags.OK, Tags.BAD_SHIFTING}, + {Tags.OK, Tags.BAD_SHIFTING}, + ], + ), + # node inserted, so should not be marked as shifted + ( + ["A", "X", "B"], + ["B", "A"], + [(0, 1, 0.9), (1, None, None), (2, 0, 0.9)], + [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.BAD_INSERTION}, {Tags.OK, Tags.BAD_SHIFTING}], + ), + # node deleted, nothing to mark as shifted + ( + ["A", "B"], + ["B", "X", "A"], + [(0, 2, 0.9), (None, 1, None), (1, 0, 0.9)], + [ + {Tags.OK, Tags.BAD_SHIFTING, Tags.BAD_DELETION_RIGHT}, + {Tags.OK, Tags.BAD_SHIFTING, Tags.BAD_DELETION_LEFT}, + ], + ), + ], + ) def test_single_error_shifted( - self, - mt_tokens: List[str], - pe_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], - true_mt_tags: List[Set[StrEnum]], + self, + mt_tokens: List[str], + pe_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + true_mt_tags: List[Set[StrEnum]], ) -> None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] assert len(predicted_tags) == len(true_mt_tags) - for predicted_tags, true_tags in zip(predicted_tags, true_mt_tags): - assert predicted_tags == {t.value for t in true_tags} + for pred_tags, true_tags in zip(predicted_tags, true_mt_tags): + assert pred_tags == {t.value for t in true_tags} class TestTagsToSource: - @pytest.mark.parametrize("src_tokens, mt_tokens, mt_pe_alignments, mt_tags, true_src_tags", [ - # ok cases - (["A", "B"], ["A", "B"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.OK}, {Tags.OK}], [{Tags.OK}, {Tags.OK}]), - (["A", "B", "C", "D"], ["A", "B", "C", "D"], [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9), (3, 3, 0.9)], [{Tags.OK}, {Tags.OK}, {Tags.OK}, {Tags.OK}], [{Tags.OK}, {Tags.OK}, {Tags.OK}, {Tags.OK}]), - ([], [], [], [], []), - # substitution cases - (["A", "B"], ["A", "C"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}]), - (["A", "B", "C", "D"], ["A", "B", "X", "D"], [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.1), (3, 3, 0.9)], [{Tags.OK}, {Tags.OK}, {Tags.BAD_SUBSTITUTION}, {Tags.OK}], [{Tags.OK}, {Tags.OK}, {Tags.BAD_SUBSTITUTION}, {Tags.OK}]), - # multiple tags - (["A", "B"], ["A", "C"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.BAD_SUBSTITUTION, Tags.BAD_DELETION_RIGHT}], [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.BAD_SUBSTITUTION, Tags.BAD_DELETION_RIGHT}]), - ]) + @pytest.mark.parametrize( + "src_tokens, mt_tokens, mt_pe_alignments, mt_tags, true_src_tags", + [ + # ok cases + (["A", "B"], ["A", "B"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.OK}, {Tags.OK}], [{Tags.OK}, {Tags.OK}]), + ( + ["A", "B", "C", "D"], + ["A", "B", "C", "D"], + [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9), (3, 3, 0.9)], + [{Tags.OK}, {Tags.OK}, {Tags.OK}, {Tags.OK}], + [{Tags.OK}, {Tags.OK}, {Tags.OK}, {Tags.OK}], + ), + ([], [], [], [], []), + # substitution cases + ( + ["A", "B"], + ["A", "C"], + [(0, 0, 0.9), (1, 1, 0.9)], + [{Tags.OK}, {Tags.BAD_SUBSTITUTION}], + [{Tags.OK}, {Tags.BAD_SUBSTITUTION}], + ), + ( + ["A", "B", "C", "D"], + ["A", "B", "X", "D"], + [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.1), (3, 3, 0.9)], + [{Tags.OK}, {Tags.OK}, {Tags.BAD_SUBSTITUTION}, {Tags.OK}], + [{Tags.OK}, {Tags.OK}, {Tags.BAD_SUBSTITUTION}, {Tags.OK}], + ), + # multiple tags + ( + ["A", "B"], + ["A", "C"], + [(0, 0, 0.9), (1, 1, 0.9)], + [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.BAD_SUBSTITUTION, Tags.BAD_DELETION_RIGHT}], + [{Tags.OK, Tags.BAD_SHIFTING}, {Tags.BAD_SUBSTITUTION, Tags.BAD_DELETION_RIGHT}], + ), + ], + ) def test_one_to_one( - self, - src_tokens: List[str], - mt_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], - mt_tags: List[Set[StrEnum]], - true_src_tags: List[Set[StrEnum]], + self, + src_tokens: List[str], + mt_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + mt_tags: List[Set[StrEnum]], + true_src_tags: List[Set[StrEnum]], ) -> None: - predicted_tags = tagger.tags_to_source([src_tokens], [mt_tokens], [mt_pe_alignments], [[{i.value for i in t} for t in mt_tags]])[0] + predicted_tags = tagger.tags_to_source( + [src_tokens], [mt_tokens], [mt_pe_alignments], [[{i.value for i in t} for t in mt_tags]] + )[0] assert len(predicted_tags) == len(true_src_tags) - for predicted_tags, true_tags in zip(predicted_tags, true_src_tags): - assert predicted_tags == {t.value for t in true_tags} + for pred_tags, true_tags in zip(predicted_tags, true_src_tags): + assert pred_tags == {t.value for t in true_tags} - @pytest.mark.parametrize("src_tokens, mt_tokens, mt_pe_alignments, mt_tags, true_src_tags", [ - (["A"], ["A", "B"], [(0, 0, 0.9), (None, 1, None)], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}], [{Tags.OK}]), - (["A", "B"], ["A", "B", "C"], [(0, 0, 0.9), (1, 1, 0.9), (None, 2, None)], [{Tags.BAD_SUBSTITUTION}, {Tags.OK}, {Tags.OK}], [{Tags.BAD_SUBSTITUTION}, {Tags.OK}]), - ]) + @pytest.mark.parametrize( + "src_tokens, mt_tokens, mt_pe_alignments, mt_tags, true_src_tags", + [ + (["A"], ["A", "B"], [(0, 0, 0.9), (None, 1, None)], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}], [{Tags.OK}]), + ( + ["A", "B"], + ["A", "B", "C"], + [(0, 0, 0.9), (1, 1, 0.9), (None, 2, None)], + [{Tags.BAD_SUBSTITUTION}, {Tags.OK}, {Tags.OK}], + [{Tags.BAD_SUBSTITUTION}, {Tags.OK}], + ), + ], + ) def test_src_deleted( - self, - src_tokens: List[str], - mt_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], - mt_tags: List[Set[StrEnum]], - true_src_tags: List[Set[StrEnum]], + self, + src_tokens: List[str], + mt_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + mt_tags: List[Set[StrEnum]], + true_src_tags: List[Set[StrEnum]], ) -> None: - predicted_tags = tagger.tags_to_source([src_tokens], [mt_tokens], [mt_pe_alignments], [[{i.value for i in t} for t in mt_tags]])[0] + predicted_tags = tagger.tags_to_source( + [src_tokens], [mt_tokens], [mt_pe_alignments], [[{i.value for i in t} for t in mt_tags]] + )[0] assert len(predicted_tags) == len(true_src_tags) - for predicted_tags, true_tags in zip(predicted_tags, true_src_tags): - assert predicted_tags == {t.value for t in true_tags} + for pred_tags, true_tags in zip(predicted_tags, true_src_tags): + assert pred_tags == {t.value for t in true_tags} - @pytest.mark.parametrize("src_tokens, mt_tokens, mt_pe_alignments, mt_tags, true_src_tags", [ - (["A", "B", "C"], ["A", "B"], [(0, 0, 0.9), (1, 1, 0.9), (2, None, None)], [{Tags.BAD_SUBSTITUTION}, {Tags.OK}], [{Tags.BAD_SUBSTITUTION}, {Tags.OK}, set()]), - (["A", "B", "C", "D"], ["B"], [(0, None, None), (1, 0, 0.9), (2, None, None), (3, None, None)], [{Tags.BAD_SUBSTITUTION}], [set(), {Tags.BAD_SUBSTITUTION}, set(), set()]), - ]) + @pytest.mark.parametrize( + "src_tokens, mt_tokens, mt_pe_alignments, mt_tags, true_src_tags", + [ + ( + ["A", "B", "C"], + ["A", "B"], + [(0, 0, 0.9), (1, 1, 0.9), (2, None, None)], + [{Tags.BAD_SUBSTITUTION}, {Tags.OK}], + [{Tags.BAD_SUBSTITUTION}, {Tags.OK}, set()], + ), + ( + ["A", "B", "C", "D"], + ["B"], + [(0, None, None), (1, 0, 0.9), (2, None, None), (3, None, None)], + [{Tags.BAD_SUBSTITUTION}], + [set(), {Tags.BAD_SUBSTITUTION}, set(), set()], + ), + ], + ) def test_mt_deleted( - self, - src_tokens: List[str], - mt_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], - mt_tags: List[Set[StrEnum]], - true_src_tags: List[Set[StrEnum]], + self, + src_tokens: List[str], + mt_tokens: List[str], + mt_pe_alignments: List[Tuple[int, int]], + mt_tags: List[Set[StrEnum]], + true_src_tags: List[Set[StrEnum]], ) -> None: - predicted_tags = tagger.tags_to_source([src_tokens], [mt_tokens], [mt_pe_alignments], [[{i.value for i in t} for t in mt_tags]])[0] + predicted_tags = tagger.tags_to_source( + [src_tokens], [mt_tokens], [mt_pe_alignments], [[{i.value for i in t} for t in mt_tags]] + )[0] assert len(predicted_tags) == len(true_src_tags) - for predicted_tags, true_tags in zip(predicted_tags, true_src_tags): - assert predicted_tags == {t.value for t in true_tags} + for pred_tags, true_tags in zip(predicted_tags, true_src_tags): + assert pred_tags == {t.value for t in true_tags} From fa7045d1a6bc12fea4c2cdf7fbc64aaa095401da Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Tue, 9 May 2023 13:24:18 +0200 Subject: [PATCH 11/23] chore: gitignore .ruff_cache/ --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6bfc3dd..29a2e87 100644 --- a/.gitignore +++ b/.gitignore @@ -66,6 +66,7 @@ coverage.xml *.py,cover .hypothesis/ .pytest_cache/ +.ruff_cache/ # Translations *.mo From 7b44a98b613b263b6123a20c035934d933e2a199 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Tue, 9 May 2023 14:05:40 +0200 Subject: [PATCH 12/23] refactor: move taggers to separate module --- divemt/cer.py | 4 +- divemt/qe_taggers/__init__.py | 13 + divemt/qe_taggers/base.py | 104 +++++ divemt/{ => qe_taggers}/custom_simalign.py | 0 .../name_tbd_tagger.py} | 412 +----------------- divemt/qe_taggers/wmt22_tagger.py | 313 +++++++++++++ divemt/{ => qe_taggers}/wmt22qe_utils.py | 0 tests/test_qe_taggers_name_tbd_tagger.py | 4 +- 8 files changed, 440 insertions(+), 410 deletions(-) create mode 100644 divemt/qe_taggers/__init__.py create mode 100644 divemt/qe_taggers/base.py rename divemt/{ => qe_taggers}/custom_simalign.py (100%) rename divemt/{qe_taggers.py => qe_taggers/name_tbd_tagger.py} (51%) create mode 100644 divemt/qe_taggers/wmt22_tagger.py rename divemt/{ => qe_taggers}/wmt22qe_utils.py (100%) diff --git a/divemt/cer.py b/divemt/cer.py index 70bf57b..21a74eb 100644 --- a/divemt/cer.py +++ b/divemt/cer.py @@ -15,7 +15,7 @@ import ctypes import itertools -import Levenshtein +import Levenshtein as levenshtein class EditDistance: @@ -86,7 +86,7 @@ def cer(hyp_words, ref_words, ed_wrapper): if len(shifted_chars) == 0: return 1.0 - edit_cost = Levenshtein.distance(shifted_chars, ref_chars) + shift_cost + edit_cost = levenshtein.distance(shifted_chars, ref_chars) + shift_cost return min(1.0, edit_cost / len(shifted_chars)) diff --git a/divemt/qe_taggers/__init__.py b/divemt/qe_taggers/__init__.py new file mode 100644 index 0000000..b5ae9e7 --- /dev/null +++ b/divemt/qe_taggers/__init__.py @@ -0,0 +1,13 @@ +from .base import QETagger, TTag, TTag +from .name_tbd_tagger import NameTBDGeneralTags, NameTBDTagger +from .wmt22_tagger import WMT22QETags, WMT22QETagger + +__all__ = [ + "QETagger", + "TTag", + "TTag", + "NameTBDGeneralTags", + "NameTBDTagger", + "WMT22QETags", + "WMT22QETagger", +] diff --git a/divemt/qe_taggers/base.py b/divemt/qe_taggers/base.py new file mode 100644 index 0000000..cd4b095 --- /dev/null +++ b/divemt/qe_taggers/base.py @@ -0,0 +1,104 @@ +from abc import ABC, abstractmethod +from typing import Any, List, Optional, Set, Tuple, Union + +from ..parse_utils import tokenize + +TTag = Union[str, Set[str]] +TAlignment = Union[Tuple[Optional[int], Optional[int]], Tuple[Optional[int], Optional[int], Optional[float]]] + + +class QETagger(ABC): + """An abstract class to produce quality estimation tags from src-mt-pe triplets.""" + + ID = "qe" + + def align_source_mt( + self, + src_tokens: List[List[str]], + mt_tokens: List[List[str]], + **align_source_mt_kwargs: Any, + ) -> List[List[TAlignment]]: + """Align source and machine translation tokens.""" + raise NotImplementedError(f"{self.__class__.__name__} does not implement align_source_mt()") + + def align_source_pe( + self, + src_tokens: List[List[str]], + pe_tokens: List[List[str]], + **align_source_pe_kwargs: Any, + ) -> List[List[TAlignment]]: + """Align source and post-edited tokens.""" + raise NotImplementedError(f"{self.__class__.__name__} does not implement align_source_pe()") + + @abstractmethod + def align_mt_pe( + self, + mt_tokens: List[List[str]], + pe_tokens: List[List[str]], + **align_mt_pe_kwargs: Any, + ) -> List[List[TAlignment]]: + """Align machine translation and post-editing tokens.""" + pass + + @staticmethod + @abstractmethod + def tags_from_edits( + mt_tokens: List[List[str]], + pe_tokens: List[List[str]], + alignments: List[List[TAlignment]], + **mt_tagging_kwargs: Any, + ) -> List[List[TTag]]: + """Produce tags on MT tokens from edits found in the PE tokens.""" + pass + + @staticmethod + @abstractmethod + def tags_to_source( + src_tokens: List[List[str]], + tgt_tokens: List[List[str]], + **src_tagging_kwargs: Any, + ) -> List[List[TTag]]: + """Propagate tags from MT to source.""" + pass + + @staticmethod + def get_tokenized( + sents: List[str], lang: Union[str, List[str]] + ) -> Tuple[List[List[str]], Union[List[str], List[List[str]]]]: + """Tokenize sentences.""" + if isinstance(lang, str): + lang = [lang] * len(sents) + tok: List[List[str]] = [tokenize(sent, curr_lang, keep_tokens=True) for sent, curr_lang in zip(sents, lang)] + assert len(tok) == len(lang) + return tok, lang + + @abstractmethod + def generate_tags( + self, + srcs: List[str], + mts: List[str], + pes: List[str], + src_langs: Union[str, List[str]], + tgt_langs: Union[str, List[str]], + ) -> Tuple[List[TTag], List[TTag]]: + """Generate word-level quality estimation tags from source-mt-pe triplets. + + Args: + srcs (`List[str]`): + List of untokenized source sentences. + mts (`List[str]`): + List of untokenized machine translated sentences. + pes (`List[str]`): + List of untokenized post-edited sentences. + src_langs (`Union[str, List[str]]`): + Either a single language code for all source sentences or a list of language codes + (one per source sentence). + tgt_langs (`Union[str, List[str]]`): + Either a single language code for all target sentences or a list of language codes + (one per machine translation). + + Returns: + `Tuple[List[TTag], List[TTag]]`: A tuple containing the lists of quality tags for all source and the + machine translation sentence, respectively. + """ + pass diff --git a/divemt/custom_simalign.py b/divemt/qe_taggers/custom_simalign.py similarity index 100% rename from divemt/custom_simalign.py rename to divemt/qe_taggers/custom_simalign.py diff --git a/divemt/qe_taggers.py b/divemt/qe_taggers/name_tbd_tagger.py similarity index 51% rename from divemt/qe_taggers.py rename to divemt/qe_taggers/name_tbd_tagger.py index 924a9cb..5e2b9fb 100644 --- a/divemt/qe_taggers.py +++ b/divemt/qe_taggers/name_tbd_tagger.py @@ -1,15 +1,7 @@ -import codecs import logging -import subprocess import sys -from abc import ABC, abstractmethod -from collections import defaultdict from itertools import groupby -from pathlib import Path -from typing import Any, Generator, List, Optional, Set, Tuple, Union -from xml.sax.saxutils import escape - -from simalign import SentenceAligner +from typing import Generator, List, Optional, Set, Tuple, Union if sys.version_info < (3, 11): from strenum import StrEnum @@ -17,406 +9,14 @@ from enum import StrEnum from tqdm import tqdm +from ..cache_utils import CacheDecorator from .custom_simalign import SentenceAligner as CustomSentenceAligner -from .parse_utils import clear_nlp_cache, tokenize -from .wmt22qe_utils import align_sentence_tercom, parse_tercom_xml_file +from ..parse_utils import clear_nlp_cache +from .base import QETagger, TAlignment, TTag logger = logging.getLogger(__name__) -TTag = Union[str, Set[str]] -TAlignment = Union[Tuple[Optional[int], Optional[int]], Tuple[Optional[int], Optional[int], Optional[float]]] - - -class QETagger(ABC): - """An abstract class to produce quality estimation tags from src-mt-pe triplets.""" - - ID = "qe" - - def align_source_mt( - self, - src_tokens: List[List[str]], - mt_tokens: List[List[str]], - **align_source_mt_kwargs: Any, - ) -> List[List[TAlignment]]: - """Align source and machine translation tokens.""" - raise NotImplementedError(f"{self.__class__.__name__} does not implement align_source_mt()") - - def align_source_pe( - self, - src_tokens: List[List[str]], - pe_tokens: List[List[str]], - **align_source_pe_kwargs: Any, - ) -> List[List[TAlignment]]: - """Align source and post-edited tokens.""" - raise NotImplementedError(f"{self.__class__.__name__} does not implement align_source_pe()") - - @abstractmethod - def align_mt_pe( - self, - mt_tokens: List[List[str]], - pe_tokens: List[List[str]], - **align_mt_pe_kwargs: Any, - ) -> List[List[TAlignment]]: - """Align machine translation and post-editing tokens.""" - pass - - @staticmethod - @abstractmethod - def tags_from_edits( - mt_tokens: List[List[str]], - pe_tokens: List[List[str]], - alignments: List[List[TAlignment]], - **mt_tagging_kwargs: Any, - ) -> List[List[TTag]]: - """Produce tags on MT tokens from edits found in the PE tokens.""" - pass - - @staticmethod - @abstractmethod - def tags_to_source( - src_tokens: List[List[str]], - tgt_tokens: List[List[str]], - **src_tagging_kwargs: Any, - ) -> List[List[TTag]]: - """Propagate tags from MT to source.""" - pass - - @staticmethod - def get_tokenized( - sents: List[str], lang: Union[str, List[str]] - ) -> Tuple[List[List[str]], Union[List[str], List[List[str]]]]: - """Tokenize sentences.""" - if isinstance(lang, str): - lang = [lang] * len(sents) - tok: List[List[str]] = [tokenize(sent, curr_lang, keep_tokens=True) for sent, curr_lang in zip(sents, lang)] - assert len(tok) == len(lang) - return tok, lang - - @abstractmethod - def generate_tags( - self, - srcs: List[str], - mts: List[str], - pes: List[str], - src_langs: Union[str, List[str]], - tgt_langs: Union[str, List[str]], - ) -> Tuple[List[TTag], List[TTag]]: - """Generate word-level quality estimation tags from source-mt-pe triplets. - - Args: - srcs (`List[str]`): - List of untokenized source sentences. - mts (`List[str]`): - List of untokenized machine translated sentences. - pes (`List[str]`): - List of untokenized post-edited sentences. - src_langs (`Union[str, List[str]]`): - Either a single language code for all source sentences or a list of language codes - (one per source sentence). - tgt_langs (`Union[str, List[str]]`): - Either a single language code for all target sentences or a list of language codes - (one per machine translation). - - Returns: - `Tuple[List[TTag], List[TTag]]`: A tuple containing the lists of quality tags for all source and the - machine translation sentence, respectively. - """ - pass - - -class FluencyRule(StrEnum): - """Fluency rules used in the WMT22 QE task.""" - - NORMAL = "normal" - MISSING = "missing-only" - IGNORE_SHF = "ignore-shift-set" - - -class OmissionRule(StrEnum): - """Omission rules used in the WMT22 QE task.""" - - NONE = "none" - LEFT = "left" - RIGHT = "right" - - -class WMT22QETags(StrEnum): - """WMT22 QE tags""" - - OK = "OK" - BAD = "BAD" - - -class WMT22QETagger(QETagger): - """Mimics the word-level QE tagging process used for WMT22.""" - - ID = "wmt22_qe" - - def __init__( - self, - aligner: Optional[SentenceAligner] = None, - tmp_dir: Optional[str] = None, - tercom_out: Optional[str] = None, - tercom_path: Optional[str] = None, - ): - """Initialize the WMT22QETagger.""" - self.aligner = aligner if aligner else SentenceAligner(model="xlmr", token_type="bpe", matching_methods="mai") - self.tmp_dir = Path(tmp_dir) if tmp_dir is not None else Path("tmp") - self.tmp_dir.mkdir(parents=True, exist_ok=True) - self.tercom_out = Path(tercom_out) if tercom_out is not None else self.tmp_dir / "tercom" - self.tercom_path = tercom_path if tercom_path is not None else "scripts/tercom.7.25.jar" - - def align_source_pe( - self, - src_tokens: List[List[str]], - pe_tokens: List[List[str]], - pe_langs: List[str], - ) -> List[List[TAlignment]]: - return [ - self.aligner.get_word_aligns(src_tok, mt_tok)["itermax" if mt_lang not in ["de", "cs"] else "inter"] - for src_tok, mt_tok, mt_lang in tqdm( - zip(src_tokens, pe_tokens, pe_langs), - total=len(src_tokens), - desc="Aligning src-pe", - ) - ] - - def align_mt_pe( - self, - mt_tokens: List[List[str]], - pe_tokens: List[List[str]], - ) -> List[List[TAlignment]]: - ref_fname = self.tmp_dir / "ref.txt" - hyp_fname = self.tmp_dir / "hyp.txt" - # Adapted from https://github.com/deep-spin/qe-corpus-builder/corpus_generation/tools/format_tercom.py - with codecs.open(str(ref_fname), "w", encoding="utf-8") as rf: - with codecs.open(str(hyp_fname), "w", encoding="utf-8") as hf: - for idx, (ref, hyp) in enumerate(zip(mt_tokens, pe_tokens)): - _ref = " ".join(ref).rstrip() - _ref = escape(_ref).replace('"', '\\"') - rf.write(f"{_ref}\t({idx})\n") - _hyp = " ".join(hyp).rstrip() - _hyp = escape(_hyp).replace('"', '\\"') - hf.write(f"{_hyp}\t({idx})\n") - ps = [ - "java", - "-jar", - self.tercom_path, - "-r", - ref_fname, - "-h", - hyp_fname, - "-n", - self.tercom_out, - "-d", - "0", - ] - try: - _ = subprocess.run(ps, capture_output=True, check=True) - except subprocess.CalledProcessError as e: - logger.warning( - f"Error while running tercom: {e.stderr}.\nPlease make sure you have java installed and that the .jar " - f"file is found at {self.tercom_path}" - ) - # Parse tercom HTML - pe_parse_tokens, mt_parse_tokens, edits = parse_tercom_xml_file(f"{self.tercom_out}.xml") - - # Sanity check: Original and tercom files match in number of tokens - # Note that we will not use the tokenized tercom outputs only the alignments - for mt_par_toks, pe_par_toks, mt_toks, pe_toks in zip(mt_parse_tokens, pe_parse_tokens, mt_tokens, pe_tokens): - # Inserted tokens correspond to empty strings in the XLM tercom output - assert len([t for t in mt_par_toks if t]) == len(mt_toks), f"{mt_par_toks} != {mt_toks}" - assert len([t for t in pe_par_toks if t]) == len(pe_toks), f"{pe_par_toks} != {pe_toks}" - - return [align_sentence_tercom(mt, pe, edit) for mt, pe, edit in zip(mt_tokens, pe_tokens, edits)] - - @staticmethod - def tags_from_edits( - mt_tokens: List[List[str]], - pe_tokens: List[List[str]], - alignments: List[List[TAlignment]], - use_gaps: bool = False, - omissions: str = OmissionRule.RIGHT.value, - ) -> List[List[TTag]]: - """Produce tags on MT tokens from edits found in the PE tokens.""" - if use_gaps: - omissions = OmissionRule.NONE.value - - mt_tags = [] - for mt_tok, pe_tok, align in tqdm( - zip(mt_tokens, pe_tokens, alignments), - desc="Tagging MT", - total=len(mt_tokens), - ): - sent_tags = [] - sent_deletion_indices = [] - mt_position = 0 - - # Loop over alignments. This has the length of the edit-distance aligned sequences. - for mt_idx, pe_idx in align: - if mt_idx is None: - # Deleted word error (need to store for later) - if omissions == OmissionRule.LEFT or omissions == OmissionRule.NONE: - sent_deletion_indices.append(mt_position - 1) - else: - sent_deletion_indices.append(mt_position) - elif pe_idx is None: - # Insertion error - sent_tags.append(WMT22QETags.BAD.value) - mt_position += 1 - elif mt_tok[mt_idx] != pe_tok[pe_idx]: - # Substitution error - sent_tags.append(WMT22QETags.BAD.value) - mt_position += 1 - else: - # OK - sent_tags.append(WMT22QETags.OK.value) - mt_position += 1 - - # Insert deletion errors as gaps - word_and_gaps_tags = [] - if use_gaps: - # Add starting OK/BAD - if -1 in sent_deletion_indices: - word_and_gaps_tags.append(WMT22QETags.BAD.value) - else: - word_and_gaps_tags.append(WMT22QETags.OK.value) - # Add rest of OK/BADs - for index, tag in enumerate(sent_tags): - if index in sent_deletion_indices: - word_and_gaps_tags.extend([tag, WMT22QETags.BAD.value]) - else: - word_and_gaps_tags.extend([tag, WMT22QETags.OK.value]) - mt_tags.append(word_and_gaps_tags) - else: - if omissions == OmissionRule.NONE: - mt_tags.append(sent_tags) - elif omissions == OmissionRule.RIGHT: - for index, tag in enumerate(sent_tags): - if index in sent_deletion_indices: - word_and_gaps_tags.append(WMT22QETags.BAD.value) - else: - word_and_gaps_tags.append(tag) - if len(sent_tags) in sent_deletion_indices: - word_and_gaps_tags.append(WMT22QETags.BAD.value) - else: - word_and_gaps_tags.append(WMT22QETags.OK.value) - elif omissions == OmissionRule.LEFT: - if -1 in sent_deletion_indices: - word_and_gaps_tags.append(WMT22QETags.BAD.value) - else: - word_and_gaps_tags.append(WMT22QETags.OK.value) - for index, tag in enumerate(sent_tags): - if index in sent_deletion_indices: - word_and_gaps_tags.append(WMT22QETags.BAD.value) - else: - word_and_gaps_tags.append(tag) - mt_tags.append(word_and_gaps_tags) - - # Basic sanity checks - if use_gaps: - assert all(len(aa) * 2 + 1 == len(bb) for aa, bb in zip(mt_tokens, mt_tags)), "MT tag creation failed" - else: - if omissions == OmissionRule.NONE: # noqa: PLR5501 - assert all(len(aa) == len(bb) for aa, bb in zip(mt_tokens, mt_tags)), "MT tag creation failed" - else: - assert all(len(aa) + 1 == len(bb) for aa, bb in zip(mt_tokens, mt_tags)), "MT tag creation failed" - return mt_tags - - @staticmethod - def tags_to_source( - src_tokens: List[List[str]], - pe_tokens: List[List[str]], - mt_tokens: List[List[str]], - src_pe_alignments: List[List[TAlignment]], - mt_pe_alignments: List[List[TAlignment]], - fluency_rule: str = FluencyRule.NORMAL.value, - ) -> List[List[TTag]]: - """Propagate tags from MT to source.""" - # Reorganize source-target alignments as a dict - pe2source = [] - for sent in src_pe_alignments: - pe2source_sent = defaultdict(list) - for src_idx, pe_idx in sent: - pe2source_sent[pe_idx].append(src_idx) - pe2source.append(pe2source_sent) - - src_tags = [] - for ( - src_sent_tok, - mt_sent_tok, - pe_sent_tok, - sent_pe2src, - sent_mt_pe_aligns, - ) in tqdm( - zip(src_tokens, mt_tokens, pe_tokens, pe2source, mt_pe_alignments), - desc="Tagging source", - total=len(src_tokens), - ): - source_sentence_bad_indices = set() - mt_position = 0 - for mt_idx, pe_idx in sent_mt_pe_aligns: - if mt_idx is None or ( - mt_idx is not None and pe_idx is not None and mt_sent_tok[mt_idx] != pe_sent_tok[pe_idx] - ): - if fluency_rule == FluencyRule.NORMAL: - source_positions = sent_pe2src[pe_idx] - source_sentence_bad_indices |= set(source_positions) - elif fluency_rule == FluencyRule.IGNORE_SHF: - if pe_sent_tok[pe_idx] not in mt_sent_tok: - source_positions = sent_pe2src[pe_idx] - source_sentence_bad_indices |= set(source_positions) - elif fluency_rule == FluencyRule.MISSING: - if mt_idx is None: - source_positions = sent_pe2src[pe_idx] - source_sentence_bad_indices |= set(source_positions) - else: - raise Exception(f"Unknown fluency rule {fluency_rule}") - else: - mt_position += 1 - source_sentence_bad_tags = [WMT22QETags.OK.value] * len(src_sent_tok) - for index in list(source_sentence_bad_indices): - source_sentence_bad_tags[index] = WMT22QETags.BAD.value - src_tags.append(source_sentence_bad_tags) - - # Basic sanity checks - assert all(len(aa) == len(bb) for aa, bb in zip(src_tokens, src_tags)), "SRC tag creation failed" - return src_tags - - def generate_tags( - self, - srcs: List[str], - mts: List[str], - pes: List[str], - src_langs: Union[str, List[str]], - tgt_langs: Union[str, List[str]], - use_gaps: bool = False, - omissions: str = OmissionRule.RIGHT.value, - fluency_rule: str = FluencyRule.NORMAL.value, - ) -> Tuple[List[List[TTag]], List[List[TTag]]]: - src_tokens, src_langs = self.get_tokenized(srcs, src_langs) - mt_tokens, tgt_langs = self.get_tokenized(mts, tgt_langs) - pe_tokens, _ = self.get_tokenized(pes, tgt_langs) - - src_pe_alignments = self.align_source_pe(src_tokens, pe_tokens, tgt_langs) - mt_pe_alignments = self.align_mt_pe(mt_tokens, pe_tokens) - - mt_tags = self.tags_from_edits(mt_tokens, pe_tokens, mt_pe_alignments, use_gaps, omissions) - src_tags = self.tags_to_source( - src_tokens, - pe_tokens, - mt_tokens, - src_pe_alignments, - mt_pe_alignments, - fluency_rule, - ) - - clear_nlp_cache() - - return src_tags, mt_tags - - class NameTBDGeneralTags(StrEnum): """Error types tags for NameTBD.""" @@ -468,7 +68,7 @@ def _fill_deleted_inserted_tokens( return new_alignments - # @CacheDecorator() + @CacheDecorator() def align_source_mt( self, src_tokens: List[List[str]], @@ -481,7 +81,7 @@ def align_source_mt( for src_tok, mt_tok in tqdm(zip(src_tokens, mt_tokens), total=len(src_tokens), desc="Aligning src-mt") ] - # @CacheDecorator() + @CacheDecorator() def align_mt_pe( self, mt_tokens: List[List[str]], diff --git a/divemt/qe_taggers/wmt22_tagger.py b/divemt/qe_taggers/wmt22_tagger.py new file mode 100644 index 0000000..1455e5a --- /dev/null +++ b/divemt/qe_taggers/wmt22_tagger.py @@ -0,0 +1,313 @@ +import codecs +import logging +import subprocess +import sys +from collections import defaultdict +from pathlib import Path +from typing import List, Optional, Tuple, Union +from xml.sax.saxutils import escape + +if sys.version_info < (3, 11): + from strenum import StrEnum +else: + from enum import StrEnum +from simalign import SentenceAligner +from tqdm import tqdm + +from ..parse_utils import clear_nlp_cache +from .base import QETagger, TAlignment, TTag +from .wmt22qe_utils import align_sentence_tercom, parse_tercom_xml_file + +logger = logging.getLogger(__name__) + + +class FluencyRule(StrEnum): + """Fluency rules used in the WMT22 QE task.""" + + NORMAL = "normal" + MISSING = "missing-only" + IGNORE_SHF = "ignore-shift-set" + + +class OmissionRule(StrEnum): + """Omission rules used in the WMT22 QE task.""" + + NONE = "none" + LEFT = "left" + RIGHT = "right" + + +class WMT22QETags(StrEnum): + """WMT22 QE tags""" + + OK = "OK" + BAD = "BAD" + + +class WMT22QETagger(QETagger): + """Mimics the word-level QE tagging process used for WMT22.""" + + ID = "wmt22_qe" + + def __init__( + self, + aligner: Optional[SentenceAligner] = None, + tmp_dir: Optional[str] = None, + tercom_out: Optional[str] = None, + tercom_path: Optional[str] = None, + ): + """Initialize the WMT22QETagger.""" + self.aligner = aligner if aligner else SentenceAligner(model="xlmr", token_type="bpe", matching_methods="mai") + self.tmp_dir = Path(tmp_dir) if tmp_dir is not None else Path("tmp") + self.tmp_dir.mkdir(parents=True, exist_ok=True) + self.tercom_out = Path(tercom_out) if tercom_out is not None else self.tmp_dir / "tercom" + self.tercom_path = tercom_path if tercom_path is not None else "scripts/tercom.7.25.jar" + + def align_source_pe( + self, + src_tokens: List[List[str]], + pe_tokens: List[List[str]], + pe_langs: List[str], + ) -> List[List[TAlignment]]: + return [ + self.aligner.get_word_aligns(src_tok, mt_tok)["itermax" if mt_lang not in ["de", "cs"] else "inter"] + for src_tok, mt_tok, mt_lang in tqdm( + zip(src_tokens, pe_tokens, pe_langs), + total=len(src_tokens), + desc="Aligning src-pe", + ) + ] + + def align_mt_pe( + self, + mt_tokens: List[List[str]], + pe_tokens: List[List[str]], + ) -> List[List[TAlignment]]: + ref_fname = self.tmp_dir / "ref.txt" + hyp_fname = self.tmp_dir / "hyp.txt" + # Adapted from https://github.com/deep-spin/qe-corpus-builder/corpus_generation/tools/format_tercom.py + with codecs.open(str(ref_fname), "w", encoding="utf-8") as rf: + with codecs.open(str(hyp_fname), "w", encoding="utf-8") as hf: + for idx, (ref, hyp) in enumerate(zip(mt_tokens, pe_tokens)): + _ref = " ".join(ref).rstrip() + _ref = escape(_ref).replace('"', '\\"') + rf.write(f"{_ref}\t({idx})\n") + _hyp = " ".join(hyp).rstrip() + _hyp = escape(_hyp).replace('"', '\\"') + hf.write(f"{_hyp}\t({idx})\n") + ps = [ + "java", + "-jar", + self.tercom_path, + "-r", + ref_fname, + "-h", + hyp_fname, + "-n", + self.tercom_out, + "-d", + "0", + ] + try: + _ = subprocess.run(ps, capture_output=True, check=True) + except subprocess.CalledProcessError as e: + logger.warning( + f"Error while running tercom: {e.stderr}.\nPlease make sure you have java installed and that the .jar " + f"file is found at {self.tercom_path}" + ) + # Parse tercom HTML + pe_parse_tokens, mt_parse_tokens, edits = parse_tercom_xml_file(f"{self.tercom_out}.xml") + + # Sanity check: Original and tercom files match in number of tokens + # Note that we will not use the tokenized tercom outputs only the alignments + for mt_par_toks, pe_par_toks, mt_toks, pe_toks in zip(mt_parse_tokens, pe_parse_tokens, mt_tokens, pe_tokens): + # Inserted tokens correspond to empty strings in the XLM tercom output + assert len([t for t in mt_par_toks if t]) == len(mt_toks), f"{mt_par_toks} != {mt_toks}" + assert len([t for t in pe_par_toks if t]) == len(pe_toks), f"{pe_par_toks} != {pe_toks}" + + return [align_sentence_tercom(mt, pe, edit) for mt, pe, edit in zip(mt_tokens, pe_tokens, edits)] + + @staticmethod + def tags_from_edits( + mt_tokens: List[List[str]], + pe_tokens: List[List[str]], + alignments: List[List[TAlignment]], + use_gaps: bool = False, + omissions: str = OmissionRule.RIGHT.value, + ) -> List[List[TTag]]: + """Produce tags on MT tokens from edits found in the PE tokens.""" + if use_gaps: + omissions = OmissionRule.NONE.value + + mt_tags = [] + for mt_tok, pe_tok, align in tqdm( + zip(mt_tokens, pe_tokens, alignments), + desc="Tagging MT", + total=len(mt_tokens), + ): + sent_tags = [] + sent_deletion_indices = [] + mt_position = 0 + + # Loop over alignments. This has the length of the edit-distance aligned sequences. + for mt_idx, pe_idx in align: + if mt_idx is None: + # Deleted word error (need to store for later) + if omissions == OmissionRule.LEFT or omissions == OmissionRule.NONE: + sent_deletion_indices.append(mt_position - 1) + else: + sent_deletion_indices.append(mt_position) + elif pe_idx is None: + # Insertion error + sent_tags.append(WMT22QETags.BAD.value) + mt_position += 1 + elif mt_tok[mt_idx] != pe_tok[pe_idx]: + # Substitution error + sent_tags.append(WMT22QETags.BAD.value) + mt_position += 1 + else: + # OK + sent_tags.append(WMT22QETags.OK.value) + mt_position += 1 + + # Insert deletion errors as gaps + word_and_gaps_tags = [] + if use_gaps: + # Add starting OK/BAD + if -1 in sent_deletion_indices: + word_and_gaps_tags.append(WMT22QETags.BAD.value) + else: + word_and_gaps_tags.append(WMT22QETags.OK.value) + # Add rest of OK/BADs + for index, tag in enumerate(sent_tags): + if index in sent_deletion_indices: + word_and_gaps_tags.extend([tag, WMT22QETags.BAD.value]) + else: + word_and_gaps_tags.extend([tag, WMT22QETags.OK.value]) + mt_tags.append(word_and_gaps_tags) + else: + if omissions == OmissionRule.NONE: + mt_tags.append(sent_tags) + elif omissions == OmissionRule.RIGHT: + for index, tag in enumerate(sent_tags): + if index in sent_deletion_indices: + word_and_gaps_tags.append(WMT22QETags.BAD.value) + else: + word_and_gaps_tags.append(tag) + if len(sent_tags) in sent_deletion_indices: + word_and_gaps_tags.append(WMT22QETags.BAD.value) + else: + word_and_gaps_tags.append(WMT22QETags.OK.value) + elif omissions == OmissionRule.LEFT: + if -1 in sent_deletion_indices: + word_and_gaps_tags.append(WMT22QETags.BAD.value) + else: + word_and_gaps_tags.append(WMT22QETags.OK.value) + for index, tag in enumerate(sent_tags): + if index in sent_deletion_indices: + word_and_gaps_tags.append(WMT22QETags.BAD.value) + else: + word_and_gaps_tags.append(tag) + mt_tags.append(word_and_gaps_tags) + + # Basic sanity checks + if use_gaps: + assert all(len(aa) * 2 + 1 == len(bb) for aa, bb in zip(mt_tokens, mt_tags)), "MT tag creation failed" + else: + if omissions == OmissionRule.NONE: # noqa: PLR5501 + assert all(len(aa) == len(bb) for aa, bb in zip(mt_tokens, mt_tags)), "MT tag creation failed" + else: + assert all(len(aa) + 1 == len(bb) for aa, bb in zip(mt_tokens, mt_tags)), "MT tag creation failed" + return mt_tags + + @staticmethod + def tags_to_source( + src_tokens: List[List[str]], + pe_tokens: List[List[str]], + mt_tokens: List[List[str]], + src_pe_alignments: List[List[TAlignment]], + mt_pe_alignments: List[List[TAlignment]], + fluency_rule: str = FluencyRule.NORMAL.value, + ) -> List[List[TTag]]: + """Propagate tags from MT to source.""" + # Reorganize source-target alignments as a dict + pe2source = [] + for sent in src_pe_alignments: + pe2source_sent = defaultdict(list) + for src_idx, pe_idx in sent: + pe2source_sent[pe_idx].append(src_idx) + pe2source.append(pe2source_sent) + + src_tags = [] + for ( + src_sent_tok, + mt_sent_tok, + pe_sent_tok, + sent_pe2src, + sent_mt_pe_aligns, + ) in tqdm( + zip(src_tokens, mt_tokens, pe_tokens, pe2source, mt_pe_alignments), + desc="Tagging source", + total=len(src_tokens), + ): + source_sentence_bad_indices = set() + mt_position = 0 + for mt_idx, pe_idx in sent_mt_pe_aligns: + if mt_idx is None or ( + mt_idx is not None and pe_idx is not None and mt_sent_tok[mt_idx] != pe_sent_tok[pe_idx] + ): + if fluency_rule == FluencyRule.NORMAL: + source_positions = sent_pe2src[pe_idx] + source_sentence_bad_indices |= set(source_positions) + elif fluency_rule == FluencyRule.IGNORE_SHF: + if pe_sent_tok[pe_idx] not in mt_sent_tok: + source_positions = sent_pe2src[pe_idx] + source_sentence_bad_indices |= set(source_positions) + elif fluency_rule == FluencyRule.MISSING: + if mt_idx is None: + source_positions = sent_pe2src[pe_idx] + source_sentence_bad_indices |= set(source_positions) + else: + raise Exception(f"Unknown fluency rule {fluency_rule}") + else: + mt_position += 1 + source_sentence_bad_tags = [WMT22QETags.OK.value] * len(src_sent_tok) + for index in list(source_sentence_bad_indices): + source_sentence_bad_tags[index] = WMT22QETags.BAD.value + src_tags.append(source_sentence_bad_tags) + + # Basic sanity checks + assert all(len(aa) == len(bb) for aa, bb in zip(src_tokens, src_tags)), "SRC tag creation failed" + return src_tags + + def generate_tags( + self, + srcs: List[str], + mts: List[str], + pes: List[str], + src_langs: Union[str, List[str]], + tgt_langs: Union[str, List[str]], + use_gaps: bool = False, + omissions: str = OmissionRule.RIGHT.value, + fluency_rule: str = FluencyRule.NORMAL.value, + ) -> Tuple[List[List[TTag]], List[List[TTag]]]: + src_tokens, src_langs = self.get_tokenized(srcs, src_langs) + mt_tokens, tgt_langs = self.get_tokenized(mts, tgt_langs) + pe_tokens, _ = self.get_tokenized(pes, tgt_langs) + + src_pe_alignments = self.align_source_pe(src_tokens, pe_tokens, tgt_langs) + mt_pe_alignments = self.align_mt_pe(mt_tokens, pe_tokens) + + mt_tags = self.tags_from_edits(mt_tokens, pe_tokens, mt_pe_alignments, use_gaps, omissions) + src_tags = self.tags_to_source( + src_tokens, + pe_tokens, + mt_tokens, + src_pe_alignments, + mt_pe_alignments, + fluency_rule, + ) + + clear_nlp_cache() + + return src_tags, mt_tags diff --git a/divemt/wmt22qe_utils.py b/divemt/qe_taggers/wmt22qe_utils.py similarity index 100% rename from divemt/wmt22qe_utils.py rename to divemt/qe_taggers/wmt22qe_utils.py diff --git a/tests/test_qe_taggers_name_tbd_tagger.py b/tests/test_qe_taggers_name_tbd_tagger.py index 0350f5f..0b13ab4 100644 --- a/tests/test_qe_taggers_name_tbd_tagger.py +++ b/tests/test_qe_taggers_name_tbd_tagger.py @@ -8,8 +8,8 @@ else: from enum import StrEnum -from divemt.qe_taggers import NameTBDGeneralTags as Tags -from divemt.qe_taggers import NameTBDTagger +from divemt.qe_taggers.name_tbd_tagger import NameTBDGeneralTags as Tags +from divemt.qe_taggers.name_tbd_tagger import NameTBDTagger tagger = NameTBDTagger() From c13798e4d4ce7051a6c37cd6c0b7a72c06579324 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Tue, 9 May 2023 14:35:39 +0200 Subject: [PATCH 13/23] fix: remove debug print in cache --- divemt/cache_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/divemt/cache_utils.py b/divemt/cache_utils.py index c9ed2b4..c1af5be 100644 --- a/divemt/cache_utils.py +++ b/divemt/cache_utils.py @@ -117,7 +117,6 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: with open(cache_file, "rb") as f: return pickle.load(f) else: - print(len(args), len(kwargs.items())) result = function(*args, **kwargs) print(f"CREATE CACHE: {cache_file}") cache_file.parent.mkdir(parents=True, exist_ok=True) From 4233114d36ca3f5a07b2114a43ff926a844215a3 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Thu, 11 May 2023 10:49:45 +0200 Subject: [PATCH 14/23] style: add cache for wmt22 and fix style for tests --- divemt/qe_taggers/name_tbd_tagger.py | 4 +-- divemt/qe_taggers/wmt22_tagger.py | 3 ++ tests/test_qe_taggers_name_tbd_tagger.py | 44 ++++++++++++++++++++---- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/divemt/qe_taggers/name_tbd_tagger.py b/divemt/qe_taggers/name_tbd_tagger.py index 5e2b9fb..a6cce6f 100644 --- a/divemt/qe_taggers/name_tbd_tagger.py +++ b/divemt/qe_taggers/name_tbd_tagger.py @@ -95,7 +95,7 @@ def align_mt_pe( @staticmethod def _group_by_node( - alignments: List[Tuple[Optional[int], Optional[int]]], by_start_node: bool = True, sort: bool = False + alignments: List[TAlignment], by_start_node: bool = True, sort: bool = False ) -> Generator[Tuple[int, List[int], List[float]], None, None]: """Yield a node id and a list of connected nodes.""" _by_index = 0 if by_start_node else 1 @@ -109,7 +109,7 @@ def _group_by_node( @staticmethod def _detect_crossing_edges( - mt_tokens: List[str], pe_tokens: List[str], alignments: List[Tuple[Optional[int], Optional[int], float]] + mt_tokens: List[str], pe_tokens: List[str], alignments: List[TAlignment] ) -> List[bool]: """Detect crossing edges in the alignments. Return mask list of nodes that cross some other node.""" # TODO: optimize from n^2 to n as 2 pointers diff --git a/divemt/qe_taggers/wmt22_tagger.py b/divemt/qe_taggers/wmt22_tagger.py index 1455e5a..3735ec7 100644 --- a/divemt/qe_taggers/wmt22_tagger.py +++ b/divemt/qe_taggers/wmt22_tagger.py @@ -17,6 +17,7 @@ from ..parse_utils import clear_nlp_cache from .base import QETagger, TAlignment, TTag from .wmt22qe_utils import align_sentence_tercom, parse_tercom_xml_file +from ..cache_utils import CacheDecorator logger = logging.getLogger(__name__) @@ -63,6 +64,7 @@ def __init__( self.tercom_out = Path(tercom_out) if tercom_out is not None else self.tmp_dir / "tercom" self.tercom_path = tercom_path if tercom_path is not None else "scripts/tercom.7.25.jar" + @CacheDecorator() def align_source_pe( self, src_tokens: List[List[str]], @@ -78,6 +80,7 @@ def align_source_pe( ) ] + @CacheDecorator() def align_mt_pe( self, mt_tokens: List[List[str]], diff --git a/tests/test_qe_taggers_name_tbd_tagger.py b/tests/test_qe_taggers_name_tbd_tagger.py index 0b13ab4..c2149a1 100644 --- a/tests/test_qe_taggers_name_tbd_tagger.py +++ b/tests/test_qe_taggers_name_tbd_tagger.py @@ -48,7 +48,12 @@ class TestTagsFromEdits: @pytest.mark.parametrize( "mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - (["A", "B"], ["A", "B"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.OK}, {Tags.OK}]), + ( + ["A", "B"], + ["A", "B"], + [(0, 0, 0.9), (1, 1, 0.9)], + [{Tags.OK}, {Tags.OK}], + ), ( ["A", "B", "C", "D"], ["A", "B", "C", "D"], @@ -79,7 +84,12 @@ def test_single_error_ok( [(0, 0, 0.9), (1, 1, 0.9), (2, 2, 0.9)], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}], ), - (["A", "B"], ["Z", "X"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}]), + ( + ["A", "B"], + ["Z", "X"], + [(0, 0, 0.9), (1, 1, 0.9)], + [{Tags.BAD_SUBSTITUTION}, {Tags.BAD_SUBSTITUTION}], + ), # For 1-n and n-1 cases see contraction and expansion tests ], ) @@ -119,8 +129,18 @@ def test_single_error_insertion( @pytest.mark.parametrize( "mt_tokens, pe_tokens, mt_pe_alignments, true_mt_tags", [ - (["A"], ["A", "X"], [(0, 0, 0.9), (None, 1, None)], [{Tags.OK, Tags.BAD_DELETION_RIGHT}]), - (["A"], ["X", "A"], [(None, 0, None), (0, 1, 0.9)], [{Tags.OK, Tags.BAD_DELETION_LEFT}]), + ( + ["A"], + ["A", "X"], + [(0, 0, 0.9), (None, 1, None)], + [{Tags.OK, Tags.BAD_DELETION_RIGHT}], + ), + ( + ["A"], + ["X", "A"], + [(None, 0, None), (0, 1, 0.9)], + [{Tags.OK, Tags.BAD_DELETION_LEFT}], + ), ( ["A", "B"], ["A", "X", "B"], @@ -361,7 +381,13 @@ class TestTagsToSource: "src_tokens, mt_tokens, mt_pe_alignments, mt_tags, true_src_tags", [ # ok cases - (["A", "B"], ["A", "B"], [(0, 0, 0.9), (1, 1, 0.9)], [{Tags.OK}, {Tags.OK}], [{Tags.OK}, {Tags.OK}]), + ( + ["A", "B"], + ["A", "B"], + [(0, 0, 0.9), (1, 1, 0.9)], + [{Tags.OK}, {Tags.OK}], + [{Tags.OK}, {Tags.OK}], + ), ( ["A", "B", "C", "D"], ["A", "B", "C", "D"], @@ -413,7 +439,13 @@ def test_one_to_one( @pytest.mark.parametrize( "src_tokens, mt_tokens, mt_pe_alignments, mt_tags, true_src_tags", [ - (["A"], ["A", "B"], [(0, 0, 0.9), (None, 1, None)], [{Tags.OK}, {Tags.BAD_SUBSTITUTION}], [{Tags.OK}]), + ( + ["A"], + ["A", "B"], + [(0, 0, 0.9), (None, 1, None)], + [{Tags.OK}, {Tags.BAD_SUBSTITUTION}], + [{Tags.OK}], + ), ( ["A", "B"], ["A", "B", "C"], From 2fdba177d59039d7ba82e85a08401824a7ca00ec Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Fri, 12 May 2023 13:32:24 +0200 Subject: [PATCH 15/23] feat: add deletions (None, j) to _fill_deleted_inserted_tokens --- divemt/qe_taggers/__init__.py | 4 +- divemt/qe_taggers/name_tbd_tagger.py | 56 +++++++++++---- tests/test_qe_taggers_name_tbd_tagger.py | 92 +++++++++++++++++++++--- 3 files changed, 125 insertions(+), 27 deletions(-) diff --git a/divemt/qe_taggers/__init__.py b/divemt/qe_taggers/__init__.py index b5ae9e7..11c5e27 100644 --- a/divemt/qe_taggers/__init__.py +++ b/divemt/qe_taggers/__init__.py @@ -1,11 +1,11 @@ -from .base import QETagger, TTag, TTag +from .base import QETagger, TTag, TAlignment from .name_tbd_tagger import NameTBDGeneralTags, NameTBDTagger from .wmt22_tagger import WMT22QETags, WMT22QETagger __all__ = [ "QETagger", "TTag", - "TTag", + "TAlignment", "NameTBDGeneralTags", "NameTBDTagger", "WMT22QETags", diff --git a/divemt/qe_taggers/name_tbd_tagger.py b/divemt/qe_taggers/name_tbd_tagger.py index a6cce6f..7f368b1 100644 --- a/divemt/qe_taggers/name_tbd_tagger.py +++ b/divemt/qe_taggers/name_tbd_tagger.py @@ -3,6 +3,8 @@ from itertools import groupby from typing import Generator, List, Optional, Set, Tuple, Union +import numpy as np + if sys.version_info < (3, 11): from strenum import StrEnum else: @@ -46,25 +48,51 @@ def __init__( else CustomSentenceAligner(model="bert", token_type="bpe", matching_methods="mai", return_similarity="avg") ) + @staticmethod def _fill_deleted_inserted_tokens( - self, len_from: int, len_to: int, alignments: List[TAlignment] + len_from: int, len_to: int, alignments: List[TAlignment] ) -> List[TAlignment]: - """As aligner provides only actual alignments, add required (None, i), (i, None) tokens""" + """ + As aligner provides only actual alignments, add required i, None), (None, j) tokens + * (i, None) just inserted in places to maintain order by i + * (None, j) inserted in estimated places + - if + """ new_alignments: List[TAlignment] = [] # Add (i, None) in correct place (ordered by i) - current_alignment_index = 0 + current_i_alignment_index = 0 for align in alignments: - # Add missing index pairs with None - while current_alignment_index < align[0]: - new_alignments.append((current_alignment_index, None)) - current_alignment_index += 1 + # Add missing index pairs before current one with (i, None) + while current_i_alignment_index < align[0]: + new_alignments.append((current_i_alignment_index, None, None)) + current_i_alignment_index += 1 # Add the current alignment pair new_alignments.append(align) - current_alignment_index += 1 - - raise NotImplementedError() + current_i_alignment_index += 1 + # add last (i, None) + while current_i_alignment_index < len_from: + new_alignments.append((current_i_alignment_index, None, None)) + current_i_alignment_index += 1 + + # Add (None, j) in correct places + missed_j_tokens = set(range(len_to)) - {j[1] for j in new_alignments} + for current_j_alignment_index in missed_j_tokens: + # select the closest (*, j) by j: obtain index in the list and j value + closest_value_index = min( + range(len(new_alignments)), + key=lambda i: abs(new_alignments[i][1] - current_j_alignment_index) if new_alignments[i][1] is not None else np.inf + ) + closest_value_j = new_alignments[closest_value_index][1] + # insert position of the (None, current_j_alignment_index) - before of after the closes value + if closest_value_j < current_j_alignment_index: + insert_index = closest_value_index + 1 + else: + insert_index = closest_value_index # - 1 + insert_index = max(0, min(insert_index, len(new_alignments))) + # insert it in right place + new_alignments.insert(insert_index, (None, current_j_alignment_index, None)) return new_alignments @@ -173,12 +201,12 @@ def tags_from_edits( """ # TODO: check. now - if embeddings are not provided, use Lev distance - mt_tags: List[List[Set[str]]] = [] + mt_tags: List[List[TTag]] = [] for mt_sent_tok, pe_sent_tok, mt_pe_sent_align in tqdm( zip(mt_tokens, pe_tokens, mt_pe_alignments), desc="Tagging MT", total=len(mt_tokens) ): - mt_sent_tags: List[Set[str]] = [set() for _ in range(len(mt_sent_tok))] + mt_sent_tags: List[TTag] = [set() for _ in range(len(mt_sent_tok))] # clear 1-n and n-1 nodes with low threshold # e.g. if 1-n or n-1 have same token or high similarity, remove low similarity as deletions/insertions @@ -310,12 +338,12 @@ def tags_to_source( - Copy tags from top match in MT and ignore other matches """ - src_tags: List[List[Set[str]]] = [] + src_tags: List[List[TTag]] = [] for src_sent_tok, _mt_sent_tok, mt_sent_tags, mt_pe_sent_align in tqdm( zip(src_tokens, mt_tokens, mt_tags, src_mt_alignments), desc="Transfer to source", total=len(src_tokens) ): - src_sent_tags: List[Set[str]] = [set() for _ in range(len(src_sent_tok))] + src_sent_tags: List[TTag] = [set() for _ in range(len(src_sent_tok))] # Solve all as 1-n matches for src_node_id, connected_mt_nodes_ids, connected_mt_similarity in NameTBDTagger._group_by_node( diff --git a/tests/test_qe_taggers_name_tbd_tagger.py b/tests/test_qe_taggers_name_tbd_tagger.py index c2149a1..d896095 100644 --- a/tests/test_qe_taggers_name_tbd_tagger.py +++ b/tests/test_qe_taggers_name_tbd_tagger.py @@ -8,6 +8,7 @@ else: from enum import StrEnum +from divemt.qe_taggers import TTag, TAlignment from divemt.qe_taggers.name_tbd_tagger import NameTBDGeneralTags as Tags from divemt.qe_taggers.name_tbd_tagger import NameTBDTagger @@ -36,13 +37,82 @@ class TestUtils: ], ) def test_detect_crossing_edges( - self, mt_len: int, mt_pe_alignments: List[Tuple[int, int]], true_mt_shifts_mask: List[bool] + self, mt_len: int, mt_pe_alignments: List[TAlignment], true_mt_shifts_mask: List[bool] ) -> None: mt_shifts_mask = tagger._detect_crossing_edges( [str(i) for i in range(mt_len)], [str(i) for i in range(mt_len)], mt_pe_alignments ) assert mt_shifts_mask == true_mt_shifts_mask + @pytest.mark.parametrize( + "mt_len, pe_len, mt_pe_alignments, true_mt_pe_alignments", + [ + # Nothing to add + ( + 3, + 3, + [(0, 0, 0.5), (1, 2, 0.5), (2, 1, 0.5)], + [(0, 0, 0.5), (1, 2, 0.5), (2, 1, 0.5)], + ), + ( + 3, + 3, + [(0, 2, 0.5), (1, 1, 0.5), (2, 0, 0.5)], + [(0, 2, 0.5), (1, 1, 0.5), (2, 0, 0.5)], + ), + # Add (i, None) - insertions + ( + 3, + 2, + [(0, 1, 0.5), (2, 0, 0.5)], + [(0, 1, 0.5), (1, None, None), (2, 0, 0.5)], + ), + ( + 3, + 1, + [(0, 0, 0.5)], + [(0, 0, 0.5), (1, None, None), (2, None, None)], + ), + # Add (None, i) - deletions in the right places + ( + 2, + 3, + [(0, 0, 0.5), (1, 2, 0.5)], + [(0, 0, 0.5), (None, 1, None), (1, 2, 0.5)], + ), + ( + 2, + 4, + [(0, 0, 0.5), (1, 3, 0.5)], + [(0, 0, 0.5), (None, 1, None), (None, 2, None), (1, 3, 0.5)], + ), + ( + 1, + 3, + [(0, 0, 0.5)], + [(0, 0, 0.5), (None, 1, None), (None, 2, None)], + ), + ( + 2, + 4, + [(0, 0, 0.5), (1, 3, 0.5)], + [(0, 0, 0.5), (None, 1, None), (None, 2, None), (1, 3, 0.5)], + ), + # mixed insert/delete - first add (1, None), then (None, j) + ( + 2, + 3, + [(0, 0, 0.5)], + [(0, 0, 0.5), (None, 1, None), (None, 2, None), (1, None, None)], + ), + ] + ) + def test_fill_deleted_inserted_tokens( + self, mt_len: int, pe_len: int, mt_pe_alignments: List[TAlignment], true_mt_pe_alignments: List[TAlignment] + ) -> None: + filled_mt_pe_alignments = tagger._fill_deleted_inserted_tokens(mt_len, pe_len, mt_pe_alignments) + assert filled_mt_pe_alignments == true_mt_pe_alignments + class TestTagsFromEdits: @pytest.mark.parametrize( @@ -67,7 +137,7 @@ def test_single_error_ok( self, mt_tokens: List[str], pe_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], + mt_pe_alignments: List[TAlignment], true_mt_tags: List[Set[StrEnum]], ) -> None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] @@ -97,7 +167,7 @@ def test_single_error_substitution( self, mt_tokens: List[str], pe_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], + mt_pe_alignments: List[TAlignment], true_mt_tags: List[Set[StrEnum]], ) -> None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] @@ -118,7 +188,7 @@ def test_single_error_insertion( self, mt_tokens: List[str], pe_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], + mt_pe_alignments: List[TAlignment], true_mt_tags: List[Set[StrEnum]], ) -> None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] @@ -181,7 +251,7 @@ def test_single_error_deletion( self, mt_tokens: List[str], pe_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], + mt_pe_alignments: List[TAlignment], true_mt_tags: List[Set[StrEnum]], ) -> None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] @@ -249,7 +319,7 @@ def test_single_error_contraction( self, mt_tokens: List[str], pe_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], + mt_pe_alignments: List[TAlignment], true_mt_tags: List[Set[str]], ) -> None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] @@ -314,7 +384,7 @@ def test_single_error_expansion( self, mt_tokens: List[str], pe_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], + mt_pe_alignments: List[TAlignment], true_mt_tags: List[Set[str]], ) -> None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] @@ -367,7 +437,7 @@ def test_single_error_shifted( self, mt_tokens: List[str], pe_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], + mt_pe_alignments: List[TAlignment], true_mt_tags: List[Set[StrEnum]], ) -> None: predicted_tags = tagger.tags_from_edits([mt_tokens], [pe_tokens], [mt_pe_alignments])[0] @@ -425,7 +495,7 @@ def test_one_to_one( self, src_tokens: List[str], mt_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], + mt_pe_alignments: List[TAlignment], mt_tags: List[Set[StrEnum]], true_src_tags: List[Set[StrEnum]], ) -> None: @@ -459,7 +529,7 @@ def test_src_deleted( self, src_tokens: List[str], mt_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], + mt_pe_alignments: List[TAlignment], mt_tags: List[Set[StrEnum]], true_src_tags: List[Set[StrEnum]], ) -> None: @@ -493,7 +563,7 @@ def test_mt_deleted( self, src_tokens: List[str], mt_tokens: List[str], - mt_pe_alignments: List[Tuple[int, int]], + mt_pe_alignments: List[TAlignment], mt_tags: List[Set[StrEnum]], true_src_tags: List[Set[StrEnum]], ) -> None: From 270491a8bfff8dd826a458d46c91be638e20c863 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Fri, 12 May 2023 13:40:25 +0200 Subject: [PATCH 16/23] fix: make _fill_deleted_inserted_tokens use lists as input --- divemt/qe_taggers/name_tbd_tagger.py | 75 +++++++++++++----------- tests/test_qe_taggers_name_tbd_tagger.py | 5 +- 2 files changed, 43 insertions(+), 37 deletions(-) diff --git a/divemt/qe_taggers/name_tbd_tagger.py b/divemt/qe_taggers/name_tbd_tagger.py index 7f368b1..b6983f2 100644 --- a/divemt/qe_taggers/name_tbd_tagger.py +++ b/divemt/qe_taggers/name_tbd_tagger.py @@ -50,51 +50,56 @@ def __init__( @staticmethod def _fill_deleted_inserted_tokens( - len_from: int, len_to: int, alignments: List[TAlignment] - ) -> List[TAlignment]: + len_from_list: List[int], len_to_list: List[int], alignments_list: List[List[TAlignment]] + ) -> List[List[TAlignment]]: """ As aligner provides only actual alignments, add required i, None), (None, j) tokens * (i, None) just inserted in places to maintain order by i * (None, j) inserted in estimated places - if """ - new_alignments: List[TAlignment] = [] + full_new_alignments: List[List[TAlignment]] = [] + + for len_from, len_to, alignments in zip(len_from_list, len_to_list, alignments_list): + new_alignments: List[TAlignment] = [] - # Add (i, None) in correct place (ordered by i) - current_i_alignment_index = 0 - for align in alignments: - # Add missing index pairs before current one with (i, None) - while current_i_alignment_index < align[0]: + # Add (i, None) in correct place (ordered by i) + current_i_alignment_index = 0 + for align in alignments: + # Add missing index pairs before current one with (i, None) + while current_i_alignment_index < align[0]: + new_alignments.append((current_i_alignment_index, None, None)) + current_i_alignment_index += 1 + + # Add the current alignment pair + new_alignments.append(align) + current_i_alignment_index += 1 + # add last (i, None) + while current_i_alignment_index < len_from: new_alignments.append((current_i_alignment_index, None, None)) current_i_alignment_index += 1 - # Add the current alignment pair - new_alignments.append(align) - current_i_alignment_index += 1 - # add last (i, None) - while current_i_alignment_index < len_from: - new_alignments.append((current_i_alignment_index, None, None)) - current_i_alignment_index += 1 - - # Add (None, j) in correct places - missed_j_tokens = set(range(len_to)) - {j[1] for j in new_alignments} - for current_j_alignment_index in missed_j_tokens: - # select the closest (*, j) by j: obtain index in the list and j value - closest_value_index = min( - range(len(new_alignments)), - key=lambda i: abs(new_alignments[i][1] - current_j_alignment_index) if new_alignments[i][1] is not None else np.inf - ) - closest_value_j = new_alignments[closest_value_index][1] - # insert position of the (None, current_j_alignment_index) - before of after the closes value - if closest_value_j < current_j_alignment_index: - insert_index = closest_value_index + 1 - else: - insert_index = closest_value_index # - 1 - insert_index = max(0, min(insert_index, len(new_alignments))) - # insert it in right place - new_alignments.insert(insert_index, (None, current_j_alignment_index, None)) - - return new_alignments + # Add (None, j) in correct places + missed_j_tokens = set(range(len_to)) - {j[1] for j in new_alignments} + for current_j_alignment_index in missed_j_tokens: + # select the closest (*, j) by j: obtain index in the list and j value + closest_value_index = min( + range(len(new_alignments)), + key=lambda i: abs(new_alignments[i][1] - current_j_alignment_index) if new_alignments[i][1] is not None else np.inf + ) + closest_value_j = new_alignments[closest_value_index][1] + # insert position of the (None, current_j_alignment_index) - before of after the closes value + if closest_value_j < current_j_alignment_index: + insert_index = closest_value_index + 1 + else: + insert_index = closest_value_index # - 1 + insert_index = max(0, min(insert_index, len(new_alignments))) + # insert it in right place + new_alignments.insert(insert_index, (None, current_j_alignment_index, None)) + + full_new_alignments.append(new_alignments) + + return full_new_alignments @CacheDecorator() def align_source_mt( diff --git a/tests/test_qe_taggers_name_tbd_tagger.py b/tests/test_qe_taggers_name_tbd_tagger.py index d896095..e2cef1d 100644 --- a/tests/test_qe_taggers_name_tbd_tagger.py +++ b/tests/test_qe_taggers_name_tbd_tagger.py @@ -110,8 +110,9 @@ def test_detect_crossing_edges( def test_fill_deleted_inserted_tokens( self, mt_len: int, pe_len: int, mt_pe_alignments: List[TAlignment], true_mt_pe_alignments: List[TAlignment] ) -> None: - filled_mt_pe_alignments = tagger._fill_deleted_inserted_tokens(mt_len, pe_len, mt_pe_alignments) - assert filled_mt_pe_alignments == true_mt_pe_alignments + filled_mt_pe_alignments = tagger._fill_deleted_inserted_tokens([mt_len], [pe_len], [mt_pe_alignments]) + for pred_alignments, true_alignments in zip(filled_mt_pe_alignments, [true_mt_pe_alignments]): + assert pred_alignments == true_alignments class TestTagsFromEdits: From 1734e313894f73b50f7e8750ef0e0d6c9fd8858e Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Fri, 12 May 2023 16:11:48 +0200 Subject: [PATCH 17/23] fix: _fill_deleted_inserted_tokens insertions error --- divemt/qe_taggers/name_tbd_tagger.py | 27 +++++++++++++++++------- tests/test_qe_taggers_name_tbd_tagger.py | 18 ++++++++++++++++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/divemt/qe_taggers/name_tbd_tagger.py b/divemt/qe_taggers/name_tbd_tagger.py index b6983f2..25de0df 100644 --- a/divemt/qe_taggers/name_tbd_tagger.py +++ b/divemt/qe_taggers/name_tbd_tagger.py @@ -73,7 +73,8 @@ def _fill_deleted_inserted_tokens( # Add the current alignment pair new_alignments.append(align) - current_i_alignment_index += 1 + if align[0] == current_i_alignment_index: + current_i_alignment_index += 1 # add last (i, None) while current_i_alignment_index < len_from: new_alignments.append((current_i_alignment_index, None, None)) @@ -318,6 +319,9 @@ def tags_from_edits( assert all( len(mt_sent_tokens) == len(mt_sent_tags) for mt_sent_tokens, mt_sent_tags in zip(mt_tokens, mt_tags) ), "MT tags creation failed, number of tokens and tags do not match" + assert all( + len(tags) > 0 for mt_sent_tags in mt_tags for tags in mt_sent_tags + ), "At least 1 tag in the set should be present for each token" return mt_tags @staticmethod @@ -350,13 +354,18 @@ def tags_to_source( ): src_sent_tags: List[TTag] = [set() for _ in range(len(src_sent_tok))] + # Filter all (i, None), (None, j) + cleared_mt_pe_sent_align = [ + alignment + for alignment in mt_pe_sent_align + if alignment[0] is not None and alignment[1] is not None + ] + # Solve all as 1-n matches for src_node_id, connected_mt_nodes_ids, connected_mt_similarity in NameTBDTagger._group_by_node( - mt_pe_sent_align, by_start_node=True, sort=True + cleared_mt_pe_sent_align, by_start_node=True, sort=True ): - if src_node_id is None: - continue - elif len(connected_mt_nodes_ids) == 0: + if len(connected_mt_nodes_ids) == 0: continue elif len(connected_mt_nodes_ids) > 1: # n-1 match, find best match @@ -370,9 +379,6 @@ def tags_to_source( else: # copy tags from best match src_sent_tags[src_node_id].update(mt_sent_tags[best_mt_node_id]) - elif connected_mt_nodes_ids[0] is None: - # nothing to copy from MT - continue else: # 1-1 match, copy tags src_sent_tags[src_node_id].update(mt_sent_tags[connected_mt_nodes_ids[0]]) @@ -400,6 +406,11 @@ def generate_tags( src_mt_alignments = self.align_source_mt(src_tokens, mt_tokens, src_langs, tgt_langs) mt_pe_alignments = self.align_mt_pe(mt_tokens, pe_tokens, tgt_langs) + mt_pe_alignments = self._fill_deleted_inserted_tokens( + [len(i) for i in mt_tokens], + [len(i) for i in pe_tokens], + mt_pe_alignments, + ) mt_tags = self.tags_from_edits(mt_tokens, pe_tokens, mt_pe_alignments) src_tags = self.tags_to_source(src_tokens, pe_tokens, src_mt_alignments, mt_tags) diff --git a/tests/test_qe_taggers_name_tbd_tagger.py b/tests/test_qe_taggers_name_tbd_tagger.py index e2cef1d..043a824 100644 --- a/tests/test_qe_taggers_name_tbd_tagger.py +++ b/tests/test_qe_taggers_name_tbd_tagger.py @@ -73,6 +73,18 @@ def test_detect_crossing_edges( [(0, 0, 0.5)], [(0, 0, 0.5), (1, None, None), (2, None, None)], ), + ( + 6, + 3, + [(0, 0, 0.5), (2, 0, 0.5), (3, 1, 0.5), (5, 2, 0.5), (6, 0, 0.5)], + [(0, 0, 0.5), (1, None, None), (2, 0, 0.5), (3, 1, 0.5), (4, None, None), (5, 2, 0.5), (6, 0, 0.5)], + ), + ( + 3, + 3, + [(0, 0, 0.5), (0, 1, 0.5), (0, 2, 0.5), (2, 0, 0.5)], + [(0, 0, 0.5), (0, 1, 0.5), (0, 2, 0.5), (1, None, None), (2, 0, 0.5)], + ), # Add (None, i) - deletions in the right places ( 2, @@ -105,6 +117,12 @@ def test_detect_crossing_edges( [(0, 0, 0.5)], [(0, 0, 0.5), (None, 1, None), (None, 2, None), (1, None, None)], ), + ( + 11, + 11, + [(0, 0, 0.5), (1, 1, 0.5), (2, 2, 0.5), (4, 4, 0.5), (5, 5, 0.5), (5, 6, 0.5), (6, 7, 0.5), (7, 8, 0.5), (8, 9, 0.5), (10, 10, 0.5)], + [(0, 0, 0.5), (1, 1, 0.5), (2, 2, 0.5), (None, 3, None), (3, None, None), (4, 4, 0.5), (5, 5, 0.5), (5, 6, 0.5), (6, 7, 0.5), (7, 8, 0.5), (8, 9, 0.5), (9, None, None), (10, 10, 0.5)], + ), ] ) def test_fill_deleted_inserted_tokens( From 5aedb48641cbb8deb67712ffd95a23325a8d00a1 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Fri, 12 May 2023 16:17:13 +0200 Subject: [PATCH 18/23] style: apply black --- divemt/qe_taggers/__init__.py | 4 ++-- divemt/qe_taggers/name_tbd_tagger.py | 16 ++++++++-------- divemt/qe_taggers/wmt22_tagger.py | 2 +- tests/test_qe_taggers_name_tbd_tagger.py | 24 +++++++++++++++++------- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/divemt/qe_taggers/__init__.py b/divemt/qe_taggers/__init__.py index 11c5e27..887c76b 100644 --- a/divemt/qe_taggers/__init__.py +++ b/divemt/qe_taggers/__init__.py @@ -1,6 +1,6 @@ -from .base import QETagger, TTag, TAlignment +from .base import QETagger, TAlignment, TTag from .name_tbd_tagger import NameTBDGeneralTags, NameTBDTagger -from .wmt22_tagger import WMT22QETags, WMT22QETagger +from .wmt22_tagger import WMT22QETagger, WMT22QETags __all__ = [ "QETagger", diff --git a/divemt/qe_taggers/name_tbd_tagger.py b/divemt/qe_taggers/name_tbd_tagger.py index 25de0df..64e64c6 100644 --- a/divemt/qe_taggers/name_tbd_tagger.py +++ b/divemt/qe_taggers/name_tbd_tagger.py @@ -12,9 +12,9 @@ from tqdm import tqdm from ..cache_utils import CacheDecorator -from .custom_simalign import SentenceAligner as CustomSentenceAligner from ..parse_utils import clear_nlp_cache from .base import QETagger, TAlignment, TTag +from .custom_simalign import SentenceAligner as CustomSentenceAligner logger = logging.getLogger(__name__) @@ -86,7 +86,11 @@ def _fill_deleted_inserted_tokens( # select the closest (*, j) by j: obtain index in the list and j value closest_value_index = min( range(len(new_alignments)), - key=lambda i: abs(new_alignments[i][1] - current_j_alignment_index) if new_alignments[i][1] is not None else np.inf + key=lambda i: ( + abs(new_alignments[i][1] - current_j_alignment_index) + if new_alignments[i][1] is not None + else np.inf + ), ) closest_value_j = new_alignments[closest_value_index][1] # insert position of the (None, current_j_alignment_index) - before of after the closes value @@ -142,9 +146,7 @@ def _group_by_node( ], [similarity for _, _, similarity in connected_alignments] @staticmethod - def _detect_crossing_edges( - mt_tokens: List[str], pe_tokens: List[str], alignments: List[TAlignment] - ) -> List[bool]: + def _detect_crossing_edges(mt_tokens: List[str], pe_tokens: List[str], alignments: List[TAlignment]) -> List[bool]: """Detect crossing edges in the alignments. Return mask list of nodes that cross some other node.""" # TODO: optimize from n^2 to n as 2 pointers shifted_mt_mask = [False] * len(mt_tokens) @@ -356,9 +358,7 @@ def tags_to_source( # Filter all (i, None), (None, j) cleared_mt_pe_sent_align = [ - alignment - for alignment in mt_pe_sent_align - if alignment[0] is not None and alignment[1] is not None + alignment for alignment in mt_pe_sent_align if alignment[0] is not None and alignment[1] is not None ] # Solve all as 1-n matches diff --git a/divemt/qe_taggers/wmt22_tagger.py b/divemt/qe_taggers/wmt22_tagger.py index 3735ec7..15b36d9 100644 --- a/divemt/qe_taggers/wmt22_tagger.py +++ b/divemt/qe_taggers/wmt22_tagger.py @@ -14,10 +14,10 @@ from simalign import SentenceAligner from tqdm import tqdm +from ..cache_utils import CacheDecorator from ..parse_utils import clear_nlp_cache from .base import QETagger, TAlignment, TTag from .wmt22qe_utils import align_sentence_tercom, parse_tercom_xml_file -from ..cache_utils import CacheDecorator logger = logging.getLogger(__name__) diff --git a/tests/test_qe_taggers_name_tbd_tagger.py b/tests/test_qe_taggers_name_tbd_tagger.py index 043a824..cefaa65 100644 --- a/tests/test_qe_taggers_name_tbd_tagger.py +++ b/tests/test_qe_taggers_name_tbd_tagger.py @@ -1,5 +1,5 @@ import sys -from typing import List, Set, Tuple +from typing import List, Set import pytest @@ -8,7 +8,7 @@ else: from enum import StrEnum -from divemt.qe_taggers import TTag, TAlignment +from divemt.qe_taggers import TAlignment from divemt.qe_taggers.name_tbd_tagger import NameTBDGeneralTags as Tags from divemt.qe_taggers.name_tbd_tagger import NameTBDTagger @@ -118,12 +118,22 @@ def test_detect_crossing_edges( [(0, 0, 0.5), (None, 1, None), (None, 2, None), (1, None, None)], ), ( - 11, - 11, - [(0, 0, 0.5), (1, 1, 0.5), (2, 2, 0.5), (4, 4, 0.5), (5, 5, 0.5), (5, 6, 0.5), (6, 7, 0.5), (7, 8, 0.5), (8, 9, 0.5), (10, 10, 0.5)], - [(0, 0, 0.5), (1, 1, 0.5), (2, 2, 0.5), (None, 3, None), (3, None, None), (4, 4, 0.5), (5, 5, 0.5), (5, 6, 0.5), (6, 7, 0.5), (7, 8, 0.5), (8, 9, 0.5), (9, None, None), (10, 10, 0.5)], + 7, + 7, + [(0, 0, 0.5), (2, 2, 0.5), (3, 3, 0.5), (3, 4, 0.5), (4, 5, 0.5), (6, 6, 0.5)], + [ + (0, 0, 0.5), + (None, 1, None), + (1, None, None), + (2, 2, 0.5), + (3, 3, 0.5), + (3, 4, 0.5), + (4, 5, 0.5), + (5, None, None), + (6, 6, 0.5), + ], ), - ] + ], ) def test_fill_deleted_inserted_tokens( self, mt_len: int, pe_len: int, mt_pe_alignments: List[TAlignment], true_mt_pe_alignments: List[TAlignment] From e577f92fc3945fa81a8e3d4865c1b6615ba6a753 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Sun, 27 Aug 2023 17:31:18 +0200 Subject: [PATCH 19/23] feat: optimize simalign to load models faster (much faster) --- divemt/qe_taggers/custom_simalign.py | 103 +++++++++++++++++++-------- 1 file changed, 73 insertions(+), 30 deletions(-) diff --git a/divemt/qe_taggers/custom_simalign.py b/divemt/qe_taggers/custom_simalign.py index 579c402..67ebadd 100644 --- a/divemt/qe_taggers/custom_simalign.py +++ b/divemt/qe_taggers/custom_simalign.py @@ -31,43 +31,78 @@ XLMRobertaModel, XLMRobertaTokenizer, XLMTokenizer, + PreTrainedTokenizer, + PreTrainedModel, ) +_LOADED_MODELS: Dict[str, Tuple[PreTrainedModel, PreTrainedTokenizer]] = {} + + class EmbeddingLoader: - def __init__(self, model: str = "bert-base-multilingual-cased", device=torch.device("cpu"), layer: int = 8): - TR_Models = { - "bert-base-uncased": (BertModel, BertTokenizer), - "bert-base-multilingual-cased": (BertModel, BertTokenizer), - "bert-base-multilingual-uncased": (BertModel, BertTokenizer), - "xlm-mlm-100-1280": (XLMModel, XLMTokenizer), - "roberta-base": (RobertaModel, RobertaTokenizer), - "xlm-roberta-base": (XLMRobertaModel, XLMRobertaTokenizer), - "xlm-roberta-large": (XLMRobertaModel, XLMRobertaTokenizer), - } + TR_MODELS = { + "bert-base-uncased": (BertModel, BertTokenizer), + "bert-base-multilingual-cased": (BertModel, BertTokenizer), + "bert-base-multilingual-uncased": (BertModel, BertTokenizer), + "xlm-mlm-100-1280": (XLMModel, XLMTokenizer), + "roberta-base": (RobertaModel, RobertaTokenizer), + "xlm-roberta-base": (XLMRobertaModel, XLMRobertaTokenizer), + "xlm-roberta-large": (XLMRobertaModel, XLMRobertaTokenizer), + } + def __init__( + self, + model: str = "bert-base-multilingual-cased", + device="cpu", + layer: int = 8, + lazy_loading: bool = True, + ): self.model = model self.device = device self.layer = layer - self.emb_model = None - self.tokenizer = None - - if model in TR_Models: - model_class, tokenizer_class = TR_Models[model] - self.emb_model = model_class.from_pretrained(model, output_hidden_states=True) - self.emb_model.eval() - self.emb_model.to(self.device) - self.tokenizer = tokenizer_class.from_pretrained(model) + self.lazy_loading = lazy_loading + self._emb_model = None + self._tokenizer = None + + if not self.lazy_loading: + self._load_model() + + @property + def tokenizer(self) -> PreTrainedTokenizer: + if self.lazy_loading and self._tokenizer is None: + self._load_model() + return self._tokenizer + + @property + def emb_model(self) -> PreTrainedModel: + if self.lazy_loading and self._emb_model is None: + self._load_model() + return self._emb_model + + def _load_model(self) -> None: + if self.model in _LOADED_MODELS: + self._emb_model, self._tokenizer = _LOADED_MODELS[self.model] else: - # try to load model with auto-classes - config = AutoConfig.from_pretrained(model, output_hidden_states=True) - self.emb_model = AutoModel.from_pretrained(model, config=config) - self.emb_model.eval() - self.emb_model.to(self.device) - self.tokenizer = AutoTokenizer.from_pretrained(model) + if self.model in self.TR_MODELS: + model_class, tokenizer_class = self.TR_MODELS[self.model] + self._emb_model = model_class.from_pretrained(self.model, output_hidden_states=True) + self._tokenizer = tokenizer_class.from_pretrained(self.model) + else: + # try to load model with auto-classes + config = AutoConfig.from_pretrained(self.model, output_hidden_states=True) + self._emb_model = AutoModel.from_pretrained(self.model, config=config) + self._tokenizer = AutoTokenizer.from_pretrained(self.model) + _LOADED_MODELS[self.model] = (self._emb_model, self._tokenizer) + + self._emb_model.eval() + # self._emb_model.half() + self._emb_model.to(self.device) def get_embed_list(self, sent_batch: List[List[str]]) -> torch.Tensor: - if self.emb_model is not None: + if self.lazy_loading and self._emb_model is None: + self._load_model() + + if self._emb_model is not None: with torch.no_grad(): if not isinstance(sent_batch[0], str): inputs = self.tokenizer( @@ -77,7 +112,10 @@ def get_embed_list(self, sent_batch: List[List[str]]) -> torch.Tensor: inputs = self.tokenizer( sent_batch, is_split_into_words=False, padding=True, truncation=True, return_tensors="pt" ) - hidden = self.emb_model(**inputs.to(self.device))["hidden_states"] + + # with torch.autocast(device_type=self.device, dtype=torch.bfloat16 if self.device == 'cpu' else torch.float16): + # hidden = self.emb_model(**inputs.to(self.device))["hidden_states"] + hidden = self._emb_model(**inputs.to(self.device))["hidden_states"] if self.layer >= len(hidden): raise ValueError( f"Specified to take embeddings from layer {self.layer}, but model has only" @@ -101,8 +139,12 @@ def __init__( ] = None, # new: ["max", "avg"] type of average similarity for words from tokens device: str = "cpu", layer: int = 8, + lazy_loading: bool = True, ): - model_names = {"bert": "bert-base-multilingual-cased", "xlmr": "xlm-roberta-base"} + model_names = { + "bert": "bert-base-multilingual-cased", + "xlmr": "xlm-roberta-base" + } all_matching_methods = {"a": "inter", "m": "mwmf", "i": "itermax", "f": "fwd", "r": "rev"} self.model = model @@ -112,9 +154,10 @@ def __init__( self.distortion = distortion self.matching_methods = [all_matching_methods[m] for m in matching_methods] self.return_similarity = return_similarity - self.device = torch.device(device) + self.device = device + self.lazy_loading = lazy_loading - self.embed_loader = EmbeddingLoader(model=self.model, device=self.device, layer=layer) + self.embed_loader = EmbeddingLoader(model=self.model, device=self.device, layer=layer, lazy_loading=lazy_loading) @staticmethod def get_max_weight_match(sim: np.ndarray) -> np.ndarray: From 01092bb3f3dbcce4344f921d2bccdbfb608b889b Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Sun, 27 Aug 2023 17:32:07 +0200 Subject: [PATCH 20/23] feat: save alignments --- divemt/parse_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/divemt/parse_utils.py b/divemt/parse_utils.py index 8d15935..a30c22f 100644 --- a/divemt/parse_utils.py +++ b/divemt/parse_utils.py @@ -351,7 +351,7 @@ def texts2qe( ) -> pd.DataFrame: """Add quality tags to a dataframe.""" pe_texts = data.copy()[data.mt_text.notnull()] - src_tags, mt_tags = tagger.generate_tags( + src_tags, mt_tags, src_mt_alignments, mt_pe_alignments = tagger.generate_tags( pe_texts["src_text"].tolist(), pe_texts["mt_text"].tolist(), pe_texts["tgt_text"].tolist(), @@ -361,6 +361,10 @@ def texts2qe( pe_texts[f"src_{tagger.ID}"] = src_tags pe_texts[f"mt_{tagger.ID}"] = mt_tags pe_texts = pe_texts[["unit_id", f"src_{tagger.ID}", f"mt_{tagger.ID}"]] + if src_mt_alignments: + pe_texts[f"src_mt_{tagger.ID}_alignments"] = src_mt_alignments + if mt_pe_alignments: + pe_texts[f"mt_pe_{tagger.ID}_alignments"] = mt_pe_alignments data = data.join(pe_texts.set_index("unit_id"), on="unit_id") return data @@ -409,7 +413,7 @@ def parse_from_folder( if add_annotations_information: texts_df = texts2annotations(texts_df) # TODO: make cache optional if add_wmt22_quality_tags: - tagger = WMT22QETagger() + tagger = WMT22QETagger() # TODO: make cache optional texts_df = texts2qe(texts_df, tagger) if add_name_tbd_quality_tags: tagger = NameTBDTagger() # TODO: make cache optional From b53d28c601ed196b5ea4289df14ad0abe5484496 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Sun, 27 Aug 2023 17:33:06 +0200 Subject: [PATCH 21/23] fix: add some typings, fix cache for ald tags --- divemt/cache_utils.py | 8 ++++++-- divemt/qe_taggers/base.py | 2 +- divemt/qe_taggers/name_tbd_tagger.py | 20 +++++++++++--------- divemt/qe_taggers/wmt22_tagger.py | 10 +++++----- pyproject.toml | 2 ++ 5 files changed, 25 insertions(+), 17 deletions(-) diff --git a/divemt/cache_utils.py b/divemt/cache_utils.py index c1af5be..08571aa 100644 --- a/divemt/cache_utils.py +++ b/divemt/cache_utils.py @@ -95,8 +95,9 @@ def calc_args_hash(*args: Any, **kwargs: any) -> bytes: class CacheDecorator: - def __init__(self, cache_dir: Optional[Path] = None, version: int = 0): + def __init__(self, cache_dir: Optional[Path] = None, version: int = 0, name: Optional[str] = None): self.version = version + self.name = name or '' self.cache_dir = cache_dir or Path(".cache") @staticmethod @@ -104,11 +105,14 @@ def _is_bound_method(function: Callable, arg: Any): return inspect.ismethod(function) or (hasattr(arg, "__class__") and function.__name__ in dir(arg.__class__)) def __call__(self, function: Callable) -> Any: + cached_function_name = function.__qualname__.replace(".", "_") + if self.name: + cached_function_name = cached_function_name + "_" + self.name @functools.wraps(function) def wrapper(*args: Any, **kwargs: Any) -> Any: cache_key_args = args[1:] if self._is_bound_method(function, args[0]) else args hash_val = calc_args_hash(*cache_key_args, **kwargs) - cache_file = self.cache_dir / f"{function.__name__}_v{self.version}_{hash_val.hex()}.pkl" + cache_file = self.cache_dir / f"{cached_function_name}_v{self.version}_{hash_val.hex()}.pkl" # TODO: add logging, not printing diff --git a/divemt/qe_taggers/base.py b/divemt/qe_taggers/base.py index cd4b095..bb631a0 100644 --- a/divemt/qe_taggers/base.py +++ b/divemt/qe_taggers/base.py @@ -80,7 +80,7 @@ def generate_tags( pes: List[str], src_langs: Union[str, List[str]], tgt_langs: Union[str, List[str]], - ) -> Tuple[List[TTag], List[TTag]]: + ) -> Tuple[List[TTag], List[TTag], List[TAlignment], List[TAlignment]]: """Generate word-level quality estimation tags from source-mt-pe triplets. Args: diff --git a/divemt/qe_taggers/name_tbd_tagger.py b/divemt/qe_taggers/name_tbd_tagger.py index 64e64c6..3c4cece 100644 --- a/divemt/qe_taggers/name_tbd_tagger.py +++ b/divemt/qe_taggers/name_tbd_tagger.py @@ -42,10 +42,12 @@ def __init__( self, aligner: Optional[CustomSentenceAligner] = None, ): + # TODO: check with xlmr amth other trained with semanting sim + # version 0 for bert and 1 for xlmr self.aligner = ( aligner if aligner - else CustomSentenceAligner(model="bert", token_type="bpe", matching_methods="mai", return_similarity="avg") + else CustomSentenceAligner(model="xlmr", token_type="bpe", matching_methods="mai", return_similarity="avg") ) @staticmethod @@ -106,7 +108,7 @@ def _fill_deleted_inserted_tokens( return full_new_alignments - @CacheDecorator() + @CacheDecorator(version=0, name="xlmr") def align_source_mt( self, src_tokens: List[List[str]], @@ -119,7 +121,7 @@ def align_source_mt( for src_tok, mt_tok in tqdm(zip(src_tokens, mt_tokens), total=len(src_tokens), desc="Aligning src-mt") ] - @CacheDecorator() + @CacheDecorator(version=0, name="xlmr") def align_mt_pe( self, mt_tokens: List[List[str]], @@ -187,11 +189,11 @@ def tags_from_edits( The following situations are considered: 1:1 match: OK if same, SUB if different 1:n match: - - Obtain similarity between 1 and n (lexical, LaBSE if not found) + - Obtain similarity between 1 and n (align scores, TODO: lexical if not found) - If all matches are threshold, tag as CON (contraction) - Else, tackle the highest match as 1:1 (OK/SUB) and the rest as None:1 (deletions) n:1 match: - - Obtain similarity between n and 1 (lexical, LaBSE if not found) + - Obtain similarity between n and 1 (align scores, TODO: lexical if not found) - If all matches are threshold, tag as EXP (expansion) - Else, tackle the highest match as 1:1 (OK/SUB) and the rest as 1:None (insertions) n:m match: @@ -339,12 +341,12 @@ def tags_to_source( The following cases are considered: 1:1 match: copy tags from MT 1:n match: - - Find highest match for 1 in n (lexical, LaBSE if not found) + - Find highest match for 1 in n (align scores, TODO: lexical if not found) - If all matches are threshold, TBD - Else, copy tags from top match in MT and ignore other matches n:1 match: copy tags from 1 to all n n:m match: - - For each 1 in n, find highest match for 1 in m (lexical, LaBSE if not found) + - For each 1 in n, find highest match for 1 in m (align scores, TODO: lexical if not found) - If all matches are threshold, ignore and continue - Copy tags from top match in MT and ignore other matches """ @@ -399,7 +401,7 @@ def generate_tags( pes: List[str], src_langs: Union[str, List[Set[str]]], tgt_langs: Union[str, List[Set[str]]], - ) -> Tuple[List[TTag], List[TTag]]: + ) -> Tuple[List[TTag], List[TTag], List[TAlignment], List[TAlignment]]: src_tokens, src_langs = self.get_tokenized(srcs, src_langs) mt_tokens, tgt_langs = self.get_tokenized(mts, tgt_langs) pe_tokens, _ = self.get_tokenized(pes, tgt_langs) @@ -417,4 +419,4 @@ def generate_tags( clear_nlp_cache() - return src_tags, mt_tags + return src_tags, mt_tags, src_mt_alignments, mt_pe_alignments diff --git a/divemt/qe_taggers/wmt22_tagger.py b/divemt/qe_taggers/wmt22_tagger.py index 15b36d9..885bc28 100644 --- a/divemt/qe_taggers/wmt22_tagger.py +++ b/divemt/qe_taggers/wmt22_tagger.py @@ -11,13 +11,13 @@ from strenum import StrEnum else: from enum import StrEnum -from simalign import SentenceAligner from tqdm import tqdm from ..cache_utils import CacheDecorator from ..parse_utils import clear_nlp_cache from .base import QETagger, TAlignment, TTag from .wmt22qe_utils import align_sentence_tercom, parse_tercom_xml_file +from .custom_simalign import SentenceAligner logger = logging.getLogger(__name__) @@ -64,7 +64,7 @@ def __init__( self.tercom_out = Path(tercom_out) if tercom_out is not None else self.tmp_dir / "tercom" self.tercom_path = tercom_path if tercom_path is not None else "scripts/tercom.7.25.jar" - @CacheDecorator() + @CacheDecorator(version=0, name="xlmr") def align_source_pe( self, src_tokens: List[List[str]], @@ -80,7 +80,7 @@ def align_source_pe( ) ] - @CacheDecorator() + @CacheDecorator(version=0, name="") def align_mt_pe( self, mt_tokens: List[List[str]], @@ -293,7 +293,7 @@ def generate_tags( use_gaps: bool = False, omissions: str = OmissionRule.RIGHT.value, fluency_rule: str = FluencyRule.NORMAL.value, - ) -> Tuple[List[List[TTag]], List[List[TTag]]]: + ) -> Tuple[List[List[TTag]], List[List[TTag]], List[TAlignment], List[TAlignment]]: src_tokens, src_langs = self.get_tokenized(srcs, src_langs) mt_tokens, tgt_langs = self.get_tokenized(mts, tgt_langs) pe_tokens, _ = self.get_tokenized(pes, tgt_langs) @@ -313,4 +313,4 @@ def generate_tags( clear_nlp_cache() - return src_tags, mt_tags + return src_tags, mt_tags, None, None diff --git a/pyproject.toml b/pyproject.toml index e8b0f4a..714d15a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,9 @@ dependencies = [ "numpy<1.19.5", # as simalign is not compatible with numpy >=1.20.0 (np.int is deprecated), 1.19.5 vulnerable "pandas", "sacrebleu", + "tokenizers", "Levenshtein", + "astred[stanza]", "stanza", "simalign", "strenum", From a90764cc115d3fb46c85445a461b43817e3274b9 Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Sun, 27 Aug 2023 18:23:35 +0200 Subject: [PATCH 22/23] feat: add analyze notebooks --- notebooks/fine-grained-analysis.ipynb | 1274 +++++++++++++++++++++++++ notebooks/qe_visualize.ipynb | 490 ++++++++++ 2 files changed, 1764 insertions(+) create mode 100644 notebooks/fine-grained-analysis.ipynb create mode 100644 notebooks/qe_visualize.ipynb diff --git a/notebooks/fine-grained-analysis.ipynb b/notebooks/fine-grained-analysis.ipynb new file mode 100644 index 0000000..6236796 --- /dev/null +++ b/notebooks/fine-grained-analysis.ipynb @@ -0,0 +1,1274 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## Imports" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 1, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r\n", + "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip available: \u001B[0m\u001B[31;49m22.3.1\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.2.1\u001B[0m\r\n", + "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n" + ] + } + ], + "source": [ + "!pip install -q seaborn pandas " + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "start_time": "2023-08-27T13:14:02.233295Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import ast\n", + "from collections import defaultdict\n", + "\n", + "from tqdm import tqdm\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from stanza.models.common.doc import Sentence as StanzaSentence, Word as StanzaWord, Token as StanzaToken\n", + "from astred import Sentence, AlignedSentences" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-27T13:14:08.022332Z", + "start_time": "2023-08-27T13:14:07.457666Z" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"error\")" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Load data" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [ + { + "data": { + "text/plain": "True" + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DATASET_FOLDER = Path() / '..' / 'data' / 'processed'\n", + "MERGED_FOLDER = DATASET_FOLDER / 'merged'\n", + "MERGED_FOLDER.exists()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-27T13:14:08.053094Z", + "start_time": "2023-08-27T13:14:08.034709Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [ + { + "data": { + "text/plain": " src_text \\\nunit_id \nflores101-main-ukr-66-pe2-3 The qualities that determine a subculture as d... \nflores101-main-nld-9-pe2-4 A course will normally be from 2-5 days and wi... \nflores101-main-ara-41-pe2-3 For example, one might say that the motor car ... \nflores101-main-ita-3-pe1-3 Most modern research telescopes are enormous f... \nflores101-main-ukr-98-pe1-4 Searches at security checkpoints have also bec... \nflores101-main-vie-19-pe1-1 South Africa have defeated the All Blacks (New... \nflores101-main-ara-102-ht-4 Some cruises feature Berlin, Germany in the br... \nflores101-main-ukr-31-ht-2 It is still produced today, but more important... \nflores101-main-ukr-9-pe2-5 Books and magazines dealing with wilderness su... \nflores101-main-tur-54-ht-1 Murray lost the first set in a tie break after... \nflores101-main-tur-53-ht-5 They all ran back from where the accident had ... \nflores101-main-ukr-16-ht-3 An up-bow usually generates a softer sound, wh... \nflores101-main-tur-68-pe1-4 He was initially hospitalised in the James Pag... \nflores101-main-tur-42-pe1-4 The presence of a true “invisible team” (Larso... \nflores101-main-nld-39-pe2-1 After its adoption by Congress on July 4, a ha... \nflores101-main-vie-80-ht-2 Accepted were Aristotle's views on all matters... \nflores101-main-vie-29-ht-1 The satellites, both of which weighed in exces... \nflores101-main-nld-91-ht-1 The use of video recording has led to importan... \nflores101-main-vie-42-pe2-2 Virtual team members often function as the poi... \nflores101-main-ukr-44-ht-2 The definition has geographic variations, wher... \n\n mt_text \\\nunit_id \nflores101-main-ukr-66-pe2-3 Якістю, яка визначає субкультуру як індивідуал... \nflores101-main-nld-9-pe2-4 Een cursus zal normaal van 2-5 dagen zijn en r... \nflores101-main-ara-41-pe2-3 على سبيل المثال، يمكن القول أن السيارة النارية... \nflores101-main-ita-3-pe1-3 I telescopi di ricerca più moderni sono enormi... \nflores101-main-ukr-98-pe1-4 Обшуки на блокпостах безпеки також стали набаг... \nflores101-main-vie-19-pe1-1 Nam Phi đã đánh bại All Blacks (New Zealand) t... \nflores101-main-ara-102-ht-4 NaN \nflores101-main-ukr-31-ht-2 NaN \nflores101-main-ukr-9-pe2-5 Бібліотеки та журнали про виживання в дикій пр... \nflores101-main-tur-54-ht-1 NaN \nflores101-main-tur-53-ht-5 NaN \nflores101-main-ukr-16-ht-3 NaN \nflores101-main-tur-68-pe1-4 Başlangıçta Great Yarmouth'daki James Paget Ha... \nflores101-main-tur-42-pe1-4 Gerçek bir “görünmez ekibin” varlığı (Larson v... \nflores101-main-nld-39-pe2-1 Nadat het 4 juli door het Congres werd aangeno... \nflores101-main-vie-80-ht-2 NaN \nflores101-main-vie-29-ht-1 NaN \nflores101-main-nld-91-ht-1 NaN \nflores101-main-vie-42-pe2-2 Các thành viên trong nhóm ảo thường là điểm ti... \nflores101-main-ukr-44-ht-2 NaN \n\n tgt_text \\\nunit_id \nflores101-main-ukr-66-pe2-3 Рисами, які визначають субкультуру як відмінну... \nflores101-main-nld-9-pe2-4 Een cursus duurt normaal gesproken 2-5 dagen z... \nflores101-main-ara-41-pe2-3 على سبيل المثال، قد يقول الفرد أن السيارات تؤد... \nflores101-main-ita-3-pe1-3 I telescopi di ricerca più avanzati sono costi... \nflores101-main-ukr-98-pe1-4 Обшуки на безпекових блокпостах також стали на... \nflores101-main-vie-19-pe1-1 Đội tuyển Nam Phi đã đánh bại đội All Blacks (... \nflores101-main-ara-102-ht-4 تعرض بعض الرحلات البحرية زيارة برلين في ألماني... \nflores101-main-ukr-31-ht-2 Їх виготовляють і зараз, але, що важливіше, ві... \nflores101-main-ukr-9-pe2-5 Книги та журнали про виживання в дикій природі... \nflores101-main-tur-54-ht-1 Her iki adamın setteki her bir servisi kazanma... \nflores101-main-tur-53-ht-5 Hepsi kaza yerinden koşarak gelmişti. \nflores101-main-ukr-16-ht-3 Рух смичком угору зазвичай дає м’якший звук, т... \nflores101-main-tur-68-pe1-4 Sürücü önce Great Yarmouth'daki James Paget Ha... \nflores101-main-tur-42-pe1-4 Gerçek bir \"görünmez ekibin\" varlığı (Larson v... \nflores101-main-nld-39-pe2-1 Nadat het op 4 juli door het Congres werd aang... \nflores101-main-vie-80-ht-2 Các quan điểm của Aristotle về tất cả các lĩnh... \nflores101-main-vie-29-ht-1 Hai vệ tinh đã va chạm trong vũ trụ cách Trái ... \nflores101-main-nld-91-ht-1 Het gebruik van video-opnamen heeft geleid tot... \nflores101-main-vie-42-pe2-2 Các thành viên trong đội ngũ ảo thường là đầu ... \nflores101-main-ukr-44-ht-2 Визначення залежить від регіону: для деяких мі... \n\n aligned_edit \\\nunit_id \nflores101-main-ukr-66-pe2-3 REF: якістю , яка визначає субкультуру як *... \nflores101-main-nld-9-pe2-4 REF: een cursus zal normaal van 2-5 d... \nflores101-main-ara-41-pe2-3 REF: على سبيل المثال ، يمكن القول أن السي... \nflores101-main-ita-3-pe1-3 REF: i telescopi di ricerca più moderni sono... \nflores101-main-ukr-98-pe1-4 REF: обшуки на блокпостах безпеки та... \nflores101-main-vie-19-pe1-1 REF: *** ***** nam phi đã đánh bại *** all_bl... \nflores101-main-ara-102-ht-4 NaN \nflores101-main-ukr-31-ht-2 NaN \nflores101-main-ukr-9-pe2-5 REF: бібліотеки та журнали про виживання в ди... \nflores101-main-tur-54-ht-1 NaN \nflores101-main-tur-53-ht-5 NaN \nflores101-main-ukr-16-ht-3 NaN \nflores101-main-tur-68-pe1-4 REF: ****** başlangıçta great yarmouth'daki j... \nflores101-main-tur-42-pe1-4 REF: gerçek bir “görünmez ekibin” varlığı (... \nflores101-main-nld-39-pe2-1 REF: nadat het ** 4 juli door het congres wer... \nflores101-main-vie-80-ht-2 NaN \nflores101-main-vie-29-ht-1 NaN \nflores101-main-nld-91-ht-1 NaN \nflores101-main-vie-42-pe2-2 REF: các thành_viên trong nhóm ảo_thường l... \nflores101-main-ukr-44-ht-2 NaN \n\n lang_id \\\nunit_id \nflores101-main-ukr-66-pe2-3 ukr \nflores101-main-nld-9-pe2-4 nld \nflores101-main-ara-41-pe2-3 ara \nflores101-main-ita-3-pe1-3 ita \nflores101-main-ukr-98-pe1-4 ukr \nflores101-main-vie-19-pe1-1 vie \nflores101-main-ara-102-ht-4 ara \nflores101-main-ukr-31-ht-2 ukr \nflores101-main-ukr-9-pe2-5 ukr \nflores101-main-tur-54-ht-1 tur \nflores101-main-tur-53-ht-5 tur \nflores101-main-ukr-16-ht-3 ukr \nflores101-main-tur-68-pe1-4 tur \nflores101-main-tur-42-pe1-4 tur \nflores101-main-nld-39-pe2-1 nld \nflores101-main-vie-80-ht-2 vie \nflores101-main-vie-29-ht-1 vie \nflores101-main-nld-91-ht-1 nld \nflores101-main-vie-42-pe2-2 vie \nflores101-main-ukr-44-ht-2 ukr \n\n src_tokens \\\nunit_id \nflores101-main-ukr-66-pe2-3 ['The', 'qualities', 'that', 'determine', 'a',... \nflores101-main-nld-9-pe2-4 ['A', 'course', 'will', 'normally', 'be', 'fro... \nflores101-main-ara-41-pe2-3 ['For', 'example', ',', 'one', 'might', 'say',... \nflores101-main-ita-3-pe1-3 ['Most', 'modern', 'research', 'telescopes', '... \nflores101-main-ukr-98-pe1-4 ['Searches', 'at', 'security', 'checkpoints', ... \nflores101-main-vie-19-pe1-1 ['South', 'Africa', 'have', 'defeated', 'the',... \nflores101-main-ara-102-ht-4 ['Some', 'cruises', 'feature', 'Berlin', ',', ... \nflores101-main-ukr-31-ht-2 ['It', 'is', 'still', 'produced', 'today', ','... \nflores101-main-ukr-9-pe2-5 ['Books', 'and', 'magazines', 'dealing', 'with... \nflores101-main-tur-54-ht-1 ['Murray', 'lost', 'the', 'first', 'set', 'in'... \nflores101-main-tur-53-ht-5 ['They', 'all', 'ran', 'back', 'from', 'where'... \nflores101-main-ukr-16-ht-3 ['An', 'up', '-', 'bow', 'usually', 'generates... \nflores101-main-tur-68-pe1-4 ['He', 'was', 'initially', 'hospitalised', 'in... \nflores101-main-tur-42-pe1-4 ['The', 'presence', 'of', 'a', 'true', '“', 'i... \nflores101-main-nld-39-pe2-1 ['After', 'its', 'adoption', 'by', 'Congress',... \nflores101-main-vie-80-ht-2 ['Accepted', 'were', 'Aristotle', \"'s\", 'views... \nflores101-main-vie-29-ht-1 ['The', 'satellites', ',', 'both', 'of', 'whic... \nflores101-main-nld-91-ht-1 ['The', 'use', 'of', 'video', 'recording', 'ha... \nflores101-main-vie-42-pe2-2 ['Virtual', 'team', 'members', 'often', 'funct... \nflores101-main-ukr-44-ht-2 ['The', 'definition', 'has', 'geographic', 'va... \n\n src_annotations \\\nunit_id \nflores101-main-ukr-66-pe2-3 [{'lemma': 'the', 'upos': 'DET', 'feats': 'Def... \nflores101-main-nld-9-pe2-4 [{'lemma': 'a', 'upos': 'DET', 'feats': 'Defin... \nflores101-main-ara-41-pe2-3 [{'lemma': 'for', 'upos': 'ADP', 'feats': '', ... \nflores101-main-ita-3-pe1-3 [{'lemma': 'most', 'upos': 'ADJ', 'feats': 'De... \nflores101-main-ukr-98-pe1-4 [{'lemma': 'search', 'upos': 'NOUN', 'feats': ... \nflores101-main-vie-19-pe1-1 [{'lemma': 'South', 'upos': 'PROPN', 'feats': ... \nflores101-main-ara-102-ht-4 [{'lemma': 'some', 'upos': 'DET', 'feats': '',... \nflores101-main-ukr-31-ht-2 [{'lemma': 'it', 'upos': 'PRON', 'feats': 'Cas... \nflores101-main-ukr-9-pe2-5 [{'lemma': 'book', 'upos': 'NOUN', 'feats': 'N... \nflores101-main-tur-54-ht-1 [{'lemma': 'Murray', 'upos': 'PROPN', 'feats':... \nflores101-main-tur-53-ht-5 [{'lemma': 'they', 'upos': 'PRON', 'feats': 'C... \nflores101-main-ukr-16-ht-3 [{'lemma': 'a', 'upos': 'DET', 'feats': 'Defin... \nflores101-main-tur-68-pe1-4 [{'lemma': 'he', 'upos': 'PRON', 'feats': 'Cas... \nflores101-main-tur-42-pe1-4 [{'lemma': 'the', 'upos': 'DET', 'feats': 'Def... \nflores101-main-nld-39-pe2-1 [{'lemma': 'after', 'upos': 'ADP', 'feats': ''... \nflores101-main-vie-80-ht-2 [{'lemma': 'accept', 'upos': 'VERB', 'feats': ... \nflores101-main-vie-29-ht-1 [{'lemma': 'the', 'upos': 'DET', 'feats': 'Def... \nflores101-main-nld-91-ht-1 [{'lemma': 'the', 'upos': 'DET', 'feats': 'Def... \nflores101-main-vie-42-pe2-2 [{'lemma': 'virtual', 'upos': 'ADJ', 'feats': ... \nflores101-main-ukr-44-ht-2 [{'lemma': 'the', 'upos': 'DET', 'feats': 'Def... \n\n mt_tokens \\\nunit_id \nflores101-main-ukr-66-pe2-3 ['Якістю', ',', 'яка', 'визначає', 'субкультур... \nflores101-main-nld-9-pe2-4 ['Een', 'cursus', 'zal', 'normaal', 'van', '2-... \nflores101-main-ara-41-pe2-3 ['على', 'سبيل', 'المثال', '،', 'يمكن', 'القول'... \nflores101-main-ita-3-pe1-3 ['I', 'telescopi', 'di', 'ricerca', 'più', 'mo... \nflores101-main-ukr-98-pe1-4 ['Обшуки', 'на', 'блокпостах', 'безпеки', 'так... \nflores101-main-vie-19-pe1-1 ['Nam', 'Phi', 'đã', 'đánh', 'bại', 'All Black... \nflores101-main-ara-102-ht-4 NaN \nflores101-main-ukr-31-ht-2 NaN \nflores101-main-ukr-9-pe2-5 ['Бібліотеки', 'та', 'журнали', 'про', 'вижива... \nflores101-main-tur-54-ht-1 NaN \nflores101-main-tur-53-ht-5 NaN \nflores101-main-ukr-16-ht-3 NaN \nflores101-main-tur-68-pe1-4 ['Başlangıçta', 'Great', \"Yarmouth'daki\", 'Jam... \nflores101-main-tur-42-pe1-4 ['Gerçek', 'bir', '“görünmez', 'ekibin”', 'var... \nflores101-main-nld-39-pe2-1 ['Nadat', 'het', '4', 'juli', 'door', 'het', '... \nflores101-main-vie-80-ht-2 NaN \nflores101-main-vie-29-ht-1 NaN \nflores101-main-nld-91-ht-1 NaN \nflores101-main-vie-42-pe2-2 ['Các', 'thành viên', 'trong', 'nhóm', 'ảo thư... \nflores101-main-ukr-44-ht-2 NaN \n\n mt_annotations \\\nunit_id \nflores101-main-ukr-66-pe2-3 [{'lemma': 'якість', 'upos': 'NOUN', 'feats': ... \nflores101-main-nld-9-pe2-4 [{'lemma': 'een', 'upos': 'DET', 'feats': 'Def... \nflores101-main-ara-41-pe2-3 [{'lemma': 'عَلَى', 'upos': 'ADP', 'feats': 'A... \nflores101-main-ita-3-pe1-3 [{'lemma': 'il', 'upos': 'DET', 'feats': 'Defi... \nflores101-main-ukr-98-pe1-4 [{'lemma': 'обшук', 'upos': 'NOUN', 'feats': '... \nflores101-main-vie-19-pe1-1 [{'lemma': 'Nam', 'upos': 'NOUN', 'feats': '',... \nflores101-main-ara-102-ht-4 NaN \nflores101-main-ukr-31-ht-2 NaN \nflores101-main-ukr-9-pe2-5 [{'lemma': 'бібліотека', 'upos': 'NOUN', 'feat... \nflores101-main-tur-54-ht-1 NaN \nflores101-main-tur-53-ht-5 NaN \nflores101-main-ukr-16-ht-3 NaN \nflores101-main-tur-68-pe1-4 [{'lemma': 'başlangıç', 'upos': 'NOUN', 'feats... \nflores101-main-tur-42-pe1-4 [{'lemma': 'gerçek', 'upos': 'ADJ', 'feats': '... \nflores101-main-nld-39-pe2-1 [{'lemma': 'nadat', 'upos': 'SCONJ', 'feats': ... \nflores101-main-vie-80-ht-2 NaN \nflores101-main-vie-29-ht-1 NaN \nflores101-main-nld-91-ht-1 NaN \nflores101-main-vie-42-pe2-2 [{'lemma': 'Các', 'upos': 'DET', 'feats': '', ... \nflores101-main-ukr-44-ht-2 NaN \n\n tgt_tokens \\\nunit_id \nflores101-main-ukr-66-pe2-3 ['Рисами', ',', 'які', 'визначають', 'субкульт... \nflores101-main-nld-9-pe2-4 ['Een', 'cursus', 'duurt', 'normaal', 'gesprok... \nflores101-main-ara-41-pe2-3 ['على', 'سبيل', 'المثال', '،', 'قد', 'يقول', '... \nflores101-main-ita-3-pe1-3 ['I', 'telescopi', 'di', 'ricerca', 'più', 'av... \nflores101-main-ukr-98-pe1-4 ['Обшуки', 'на', 'безпекових', 'блокпостах', '... \nflores101-main-vie-19-pe1-1 ['Đội', 'tuyển', 'Nam', 'Phi', 'đã', 'đánh', '... \nflores101-main-ara-102-ht-4 ['تعرض', 'بعض', 'الرحلات', 'البحرية', 'زيارة',... \nflores101-main-ukr-31-ht-2 ['Їх', 'виготовляють', 'і', 'зараз', ',', 'але... \nflores101-main-ukr-9-pe2-5 ['Книги', 'та', 'журнали', 'про', 'виживання',... \nflores101-main-tur-54-ht-1 ['Her', 'iki', 'adamın', 'setteki', 'her', 'bi... \nflores101-main-tur-53-ht-5 ['Hepsi', 'kaza', 'yerinden', 'koşarak', 'gelm... \nflores101-main-ukr-16-ht-3 ['Рух', 'смичком', 'угору', 'зазвичай', 'дає',... \nflores101-main-tur-68-pe1-4 ['Sürücü', 'önce', 'Great', \"Yarmouth'daki\", '... \nflores101-main-tur-42-pe1-4 ['Gerçek', 'bir', '\"görünmez', 'ekibin\"', 'var... \nflores101-main-nld-39-pe2-1 ['Nadat', 'het', 'op', '4', 'juli', 'door', 'h... \nflores101-main-vie-80-ht-2 ['Các', 'quan điểm', 'của', 'Aristotle', 'về',... \nflores101-main-vie-29-ht-1 ['Hai', 'vệ', 'tinh', 'đã', 'va', 'chạm', 'tro... \nflores101-main-nld-91-ht-1 ['Het', 'gebruik', 'van', 'video-opnamen', 'he... \nflores101-main-vie-42-pe2-2 ['Các', 'thành viên', 'trong', 'đội ngũ', 'ảo ... \nflores101-main-ukr-44-ht-2 ['Визначення', 'залежить', 'від', 'регіону', '... \n\n ... doc_id time_s time_m time_h \\\nunit_id ... \nflores101-main-ukr-66-pe2-3 ... 66 86.563 1.4427 0.0240 \nflores101-main-nld-9-pe2-4 ... 9 19.836 0.3306 0.0055 \nflores101-main-ara-41-pe2-3 ... 41 52.346 0.8724 0.0145 \nflores101-main-ita-3-pe1-3 ... 3 137.284 2.2881 0.0381 \nflores101-main-ukr-98-pe1-4 ... 98 28.684 0.4781 0.0080 \nflores101-main-vie-19-pe1-1 ... 19 182.751 3.0458 0.0508 \nflores101-main-ara-102-ht-4 ... 102 74.626 1.2438 0.0207 \nflores101-main-ukr-31-ht-2 ... 31 91.587 1.5264 0.0254 \nflores101-main-ukr-9-pe2-5 ... 9 45.923 0.7654 0.0128 \nflores101-main-tur-54-ht-1 ... 54 142.789 2.3798 0.0397 \nflores101-main-tur-53-ht-5 ... 53 26.832 0.4472 0.0075 \nflores101-main-ukr-16-ht-3 ... 16 97.448 1.6241 0.0271 \nflores101-main-tur-68-pe1-4 ... 68 19.456 0.3243 0.0054 \nflores101-main-tur-42-pe1-4 ... 42 30.930 0.5155 0.0086 \nflores101-main-nld-39-pe2-1 ... 39 73.994 1.2332 0.0206 \nflores101-main-vie-80-ht-2 ... 80 42.489 0.7081 0.0118 \nflores101-main-vie-29-ht-1 ... 29 178.737 2.9789 0.0496 \nflores101-main-nld-91-ht-1 ... 91 35.700 0.5950 0.0099 \nflores101-main-vie-42-pe2-2 ... 42 101.728 1.6955 0.0283 \nflores101-main-ukr-44-ht-2 ... 44 226.385 3.7731 0.0629 \n\n time_per_char time_per_word key_per_char \\\nunit_id \nflores101-main-ukr-66-pe2-3 0.5549 4.1220 1.9872 \nflores101-main-nld-9-pe2-4 0.1681 0.9016 0.6102 \nflores101-main-ara-41-pe2-3 0.5690 3.2716 0.4022 \nflores101-main-ita-3-pe1-3 1.2480 9.8060 1.3182 \nflores101-main-ukr-98-pe1-4 0.2758 1.7928 1.0096 \nflores101-main-vie-19-pe1-1 1.2265 7.3100 1.6174 \nflores101-main-ara-102-ht-4 0.3969 1.9638 1.1330 \nflores101-main-ukr-31-ht-2 0.7386 4.5794 1.5161 \nflores101-main-ukr-9-pe2-5 0.4064 2.7014 2.2478 \nflores101-main-tur-54-ht-1 1.5354 7.1394 2.3226 \nflores101-main-tur-53-ht-5 0.4879 2.6832 0.7273 \nflores101-main-ukr-16-ht-3 1.0592 6.4965 1.3913 \nflores101-main-tur-68-pe1-4 0.2560 1.6213 0.7500 \nflores101-main-tur-42-pe1-4 0.2621 1.4729 0.5339 \nflores101-main-nld-39-pe2-1 0.3474 1.9472 0.0329 \nflores101-main-vie-80-ht-2 0.5311 3.8626 2.6375 \nflores101-main-vie-29-ht-1 1.1606 7.1495 2.6299 \nflores101-main-nld-91-ht-1 0.2364 1.6227 1.2914 \nflores101-main-vie-42-pe2-2 1.0708 6.7819 1.8421 \nflores101-main-ukr-44-ht-2 1.7967 10.7802 2.4762 \n\n words_per_hour words_per_minute \\\nunit_id \nflores101-main-ukr-66-pe2-3 873.3524 14.5559 \nflores101-main-nld-9-pe2-4 3992.7405 66.5457 \nflores101-main-ara-41-pe2-3 1100.3706 18.3395 \nflores101-main-ita-3-pe1-3 367.1222 6.1187 \nflores101-main-ukr-98-pe1-4 2008.0881 33.4681 \nflores101-main-vie-19-pe1-1 492.4734 8.2079 \nflores101-main-ara-102-ht-4 1833.1413 30.5524 \nflores101-main-ukr-31-ht-2 786.1378 13.1023 \nflores101-main-ukr-9-pe2-5 1332.6655 22.2111 \nflores101-main-tur-54-ht-1 504.2405 8.4040 \nflores101-main-tur-53-ht-5 1341.6816 22.3614 \nflores101-main-ukr-16-ht-3 554.1417 9.2357 \nflores101-main-tur-68-pe1-4 2220.3947 37.0066 \nflores101-main-tur-42-pe1-4 2444.2289 40.7371 \nflores101-main-nld-39-pe2-1 1848.7986 30.8133 \nflores101-main-vie-80-ht-2 932.0059 15.5334 \nflores101-main-vie-29-ht-1 503.5331 8.3922 \nflores101-main-nld-91-ht-1 2218.4874 36.9748 \nflores101-main-vie-42-pe2-2 530.8273 8.8471 \nflores101-main-ukr-44-ht-2 333.9444 5.5657 \n\n per_subject_visit_order \nunit_id \nflores101-main-ukr-66-pe2-3 284 \nflores101-main-nld-9-pe2-4 388 \nflores101-main-ara-41-pe2-3 181 \nflores101-main-ita-3-pe1-3 424 \nflores101-main-ukr-98-pe1-4 421 \nflores101-main-vie-19-pe1-1 74 \nflores101-main-ara-102-ht-4 20 \nflores101-main-ukr-31-ht-2 132 \nflores101-main-ukr-9-pe2-5 385 \nflores101-main-tur-54-ht-1 235 \nflores101-main-tur-53-ht-5 234 \nflores101-main-ukr-16-ht-3 67 \nflores101-main-tur-68-pe1-4 294 \nflores101-main-tur-42-pe1-4 187 \nflores101-main-nld-39-pe2-1 93 \nflores101-main-vie-80-ht-2 429 \nflores101-main-vie-29-ht-1 119 \nflores101-main-nld-91-ht-1 394 \nflores101-main-vie-42-pe2-2 178 \nflores101-main-ukr-44-ht-2 194 \n\n[20 rows x 66 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
src_textmt_texttgt_textaligned_editlang_idsrc_tokenssrc_annotationsmt_tokensmt_annotationstgt_tokens...doc_idtime_stime_mtime_htime_per_chartime_per_wordkey_per_charwords_per_hourwords_per_minuteper_subject_visit_order
unit_id
flores101-main-ukr-66-pe2-3The qualities that determine a subculture as d...Якістю, яка визначає субкультуру як індивідуал...Рисами, які визначають субкультуру як відмінну...REF: якістю , яка визначає субкультуру як *...ukr['The', 'qualities', 'that', 'determine', 'a',...[{'lemma': 'the', 'upos': 'DET', 'feats': 'Def...['Якістю', ',', 'яка', 'визначає', 'субкультур...[{'lemma': 'якість', 'upos': 'NOUN', 'feats': ...['Рисами', ',', 'які', 'визначають', 'субкульт......6686.5631.44270.02400.55494.12201.9872873.352414.5559284
flores101-main-nld-9-pe2-4A course will normally be from 2-5 days and wi...Een cursus zal normaal van 2-5 dagen zijn en r...Een cursus duurt normaal gesproken 2-5 dagen z...REF: een cursus zal normaal van 2-5 d...nld['A', 'course', 'will', 'normally', 'be', 'fro...[{'lemma': 'a', 'upos': 'DET', 'feats': 'Defin...['Een', 'cursus', 'zal', 'normaal', 'van', '2-...[{'lemma': 'een', 'upos': 'DET', 'feats': 'Def...['Een', 'cursus', 'duurt', 'normaal', 'gesprok......919.8360.33060.00550.16810.90160.61023992.740566.5457388
flores101-main-ara-41-pe2-3For example, one might say that the motor car ...على سبيل المثال، يمكن القول أن السيارة النارية...على سبيل المثال، قد يقول الفرد أن السيارات تؤد...REF: على سبيل المثال ، يمكن القول أن السي...ara['For', 'example', ',', 'one', 'might', 'say',...[{'lemma': 'for', 'upos': 'ADP', 'feats': '', ...['على', 'سبيل', 'المثال', '،', 'يمكن', 'القول'...[{'lemma': 'عَلَى', 'upos': 'ADP', 'feats': 'A...['على', 'سبيل', 'المثال', '،', 'قد', 'يقول', '......4152.3460.87240.01450.56903.27160.40221100.370618.3395181
flores101-main-ita-3-pe1-3Most modern research telescopes are enormous f...I telescopi di ricerca più moderni sono enormi...I telescopi di ricerca più avanzati sono costi...REF: i telescopi di ricerca più moderni sono...ita['Most', 'modern', 'research', 'telescopes', '...[{'lemma': 'most', 'upos': 'ADJ', 'feats': 'De...['I', 'telescopi', 'di', 'ricerca', 'più', 'mo...[{'lemma': 'il', 'upos': 'DET', 'feats': 'Defi...['I', 'telescopi', 'di', 'ricerca', 'più', 'av......3137.2842.28810.03811.24809.80601.3182367.12226.1187424
flores101-main-ukr-98-pe1-4Searches at security checkpoints have also bec...Обшуки на блокпостах безпеки також стали набаг...Обшуки на безпекових блокпостах також стали на...REF: обшуки на блокпостах безпеки та...ukr['Searches', 'at', 'security', 'checkpoints', ...[{'lemma': 'search', 'upos': 'NOUN', 'feats': ...['Обшуки', 'на', 'блокпостах', 'безпеки', 'так...[{'lemma': 'обшук', 'upos': 'NOUN', 'feats': '...['Обшуки', 'на', 'безпекових', 'блокпостах', '......9828.6840.47810.00800.27581.79281.00962008.088133.4681421
flores101-main-vie-19-pe1-1South Africa have defeated the All Blacks (New...Nam Phi đã đánh bại All Blacks (New Zealand) t...Đội tuyển Nam Phi đã đánh bại đội All Blacks (...REF: *** ***** nam phi đã đánh bại *** all_bl...vie['South', 'Africa', 'have', 'defeated', 'the',...[{'lemma': 'South', 'upos': 'PROPN', 'feats': ...['Nam', 'Phi', 'đã', 'đánh', 'bại', 'All Black...[{'lemma': 'Nam', 'upos': 'NOUN', 'feats': '',...['Đội', 'tuyển', 'Nam', 'Phi', 'đã', 'đánh', '......19182.7513.04580.05081.22657.31001.6174492.47348.207974
flores101-main-ara-102-ht-4Some cruises feature Berlin, Germany in the br...NaNتعرض بعض الرحلات البحرية زيارة برلين في ألماني...NaNara['Some', 'cruises', 'feature', 'Berlin', ',', ...[{'lemma': 'some', 'upos': 'DET', 'feats': '',...NaNNaN['تعرض', 'بعض', 'الرحلات', 'البحرية', 'زيارة',......10274.6261.24380.02070.39691.96381.13301833.141330.552420
flores101-main-ukr-31-ht-2It is still produced today, but more important...NaNЇх виготовляють і зараз, але, що важливіше, ві...NaNukr['It', 'is', 'still', 'produced', 'today', ','...[{'lemma': 'it', 'upos': 'PRON', 'feats': 'Cas...NaNNaN['Їх', 'виготовляють', 'і', 'зараз', ',', 'але......3191.5871.52640.02540.73864.57941.5161786.137813.1023132
flores101-main-ukr-9-pe2-5Books and magazines dealing with wilderness su...Бібліотеки та журнали про виживання в дикій пр...Книги та журнали про виживання в дикій природі...REF: бібліотеки та журнали про виживання в ди...ukr['Books', 'and', 'magazines', 'dealing', 'with...[{'lemma': 'book', 'upos': 'NOUN', 'feats': 'N...['Бібліотеки', 'та', 'журнали', 'про', 'вижива...[{'lemma': 'бібліотека', 'upos': 'NOUN', 'feat...['Книги', 'та', 'журнали', 'про', 'виживання',......945.9230.76540.01280.40642.70142.24781332.665522.2111385
flores101-main-tur-54-ht-1Murray lost the first set in a tie break after...NaNHer iki adamın setteki her bir servisi kazanma...NaNtur['Murray', 'lost', 'the', 'first', 'set', 'in'...[{'lemma': 'Murray', 'upos': 'PROPN', 'feats':...NaNNaN['Her', 'iki', 'adamın', 'setteki', 'her', 'bi......54142.7892.37980.03971.53547.13942.3226504.24058.4040235
flores101-main-tur-53-ht-5They all ran back from where the accident had ...NaNHepsi kaza yerinden koşarak gelmişti.NaNtur['They', 'all', 'ran', 'back', 'from', 'where'...[{'lemma': 'they', 'upos': 'PRON', 'feats': 'C...NaNNaN['Hepsi', 'kaza', 'yerinden', 'koşarak', 'gelm......5326.8320.44720.00750.48792.68320.72731341.681622.3614234
flores101-main-ukr-16-ht-3An up-bow usually generates a softer sound, wh...NaNРух смичком угору зазвичай дає м’якший звук, т...NaNukr['An', 'up', '-', 'bow', 'usually', 'generates...[{'lemma': 'a', 'upos': 'DET', 'feats': 'Defin...NaNNaN['Рух', 'смичком', 'угору', 'зазвичай', 'дає',......1697.4481.62410.02711.05926.49651.3913554.14179.235767
flores101-main-tur-68-pe1-4He was initially hospitalised in the James Pag...Başlangıçta Great Yarmouth'daki James Paget Ha...Sürücü önce Great Yarmouth'daki James Paget Ha...REF: ****** başlangıçta great yarmouth'daki j...tur['He', 'was', 'initially', 'hospitalised', 'in...[{'lemma': 'he', 'upos': 'PRON', 'feats': 'Cas...['Başlangıçta', 'Great', \"Yarmouth'daki\", 'Jam...[{'lemma': 'başlangıç', 'upos': 'NOUN', 'feats...['Sürücü', 'önce', 'Great', \"Yarmouth'daki\", '......6819.4560.32430.00540.25601.62130.75002220.394737.0066294
flores101-main-tur-42-pe1-4The presence of a true “invisible team” (Larso...Gerçek bir “görünmez ekibin” varlığı (Larson v...Gerçek bir \"görünmez ekibin\" varlığı (Larson v...REF: gerçek bir “görünmez ekibin” varlığı (...tur['The', 'presence', 'of', 'a', 'true', '“', 'i...[{'lemma': 'the', 'upos': 'DET', 'feats': 'Def...['Gerçek', 'bir', '“görünmez', 'ekibin”', 'var...[{'lemma': 'gerçek', 'upos': 'ADJ', 'feats': '...['Gerçek', 'bir', '\"görünmez', 'ekibin\"', 'var......4230.9300.51550.00860.26211.47290.53392444.228940.7371187
flores101-main-nld-39-pe2-1After its adoption by Congress on July 4, a ha...Nadat het 4 juli door het Congres werd aangeno...Nadat het op 4 juli door het Congres werd aang...REF: nadat het ** 4 juli door het congres wer...nld['After', 'its', 'adoption', 'by', 'Congress',...[{'lemma': 'after', 'upos': 'ADP', 'feats': ''...['Nadat', 'het', '4', 'juli', 'door', 'het', '...[{'lemma': 'nadat', 'upos': 'SCONJ', 'feats': ...['Nadat', 'het', 'op', '4', 'juli', 'door', 'h......3973.9941.23320.02060.34741.94720.03291848.798630.813393
flores101-main-vie-80-ht-2Accepted were Aristotle's views on all matters...NaNCác quan điểm của Aristotle về tất cả các lĩnh...NaNvie['Accepted', 'were', 'Aristotle', \"'s\", 'views...[{'lemma': 'accept', 'upos': 'VERB', 'feats': ...NaNNaN['Các', 'quan điểm', 'của', 'Aristotle', 'về',......8042.4890.70810.01180.53113.86262.6375932.005915.5334429
flores101-main-vie-29-ht-1The satellites, both of which weighed in exces...NaNHai vệ tinh đã va chạm trong vũ trụ cách Trái ...NaNvie['The', 'satellites', ',', 'both', 'of', 'whic...[{'lemma': 'the', 'upos': 'DET', 'feats': 'Def...NaNNaN['Hai', 'vệ', 'tinh', 'đã', 'va', 'chạm', 'tro......29178.7372.97890.04961.16067.14952.6299503.53318.3922119
flores101-main-nld-91-ht-1The use of video recording has led to importan...NaNHet gebruik van video-opnamen heeft geleid tot...NaNnld['The', 'use', 'of', 'video', 'recording', 'ha...[{'lemma': 'the', 'upos': 'DET', 'feats': 'Def...NaNNaN['Het', 'gebruik', 'van', 'video-opnamen', 'he......9135.7000.59500.00990.23641.62271.29142218.487436.9748394
flores101-main-vie-42-pe2-2Virtual team members often function as the poi...Các thành viên trong nhóm ảo thường là điểm ti...Các thành viên trong đội ngũ ảo thường là đầu ...REF: các thành_viên trong nhóm ảo_thường l...vie['Virtual', 'team', 'members', 'often', 'funct...[{'lemma': 'virtual', 'upos': 'ADJ', 'feats': ...['Các', 'thành viên', 'trong', 'nhóm', 'ảo thư...[{'lemma': 'Các', 'upos': 'DET', 'feats': '', ...['Các', 'thành viên', 'trong', 'đội ngũ', 'ảo ......42101.7281.69550.02831.07086.78191.8421530.82738.8471178
flores101-main-ukr-44-ht-2The definition has geographic variations, wher...NaNВизначення залежить від регіону: для деяких мі...NaNukr['The', 'definition', 'has', 'geographic', 'va...[{'lemma': 'the', 'upos': 'DET', 'feats': 'Def...NaNNaN['Визначення', 'залежить', 'від', 'регіону', '......44226.3853.77310.06291.796710.78022.4762333.94445.5657194
\n

20 rows × 66 columns

\n
" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame()\n", + "for lang in ['ukr', 'ara', 'ita', 'nld', 'tur', 'vie']:\n", + " if lang == 'ita':\n", + " lang_df_main_texts = pd.read_csv(MERGED_FOLDER / f'{lang}_t1_t4_t5_main_texts.tsv', sep='\\t', index_col=0)\n", + " lang_df_main = pd.read_csv(MERGED_FOLDER / f'{lang}_t1_t4_t5_main.tsv', sep='\\t', index_col=0)\n", + " else:\n", + " lang_df_main_texts = pd.read_csv(MERGED_FOLDER / f'{lang}_t1_t2_t3_main_texts.tsv', sep='\\t', index_col=0)\n", + " lang_df_main = pd.read_csv(MERGED_FOLDER / f'{lang}_t1_t2_t3_main.tsv', sep='\\t', index_col=0)\n", + "\n", + " lang_df = pd.merge(lang_df_main_texts, lang_df_main, how='inner', on=['unit_id', 'lang_id'])\n", + "\n", + " df = pd.concat([df, lang_df], ignore_index=False)\n", + "\n", + "df.sample(20)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-27T13:17:13.399083Z", + "start_time": "2023-08-27T13:17:11.128478Z" + } + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 79%|███████▉ | 4098/5160 [23:06<07:44, 2.29it/s] " + ] + } + ], + "source": [ + "# process data: read files, read lists to python lists, read alignments \n", + "\n", + "df_overlap = pd.DataFrame()\n", + "\n", + "for _id, x in tqdm(df.iterrows(), total=len(df)):\n", + " pe_tokens = ast.literal_eval(x['tgt_tokens'])\n", + " mt_tokens = ast.literal_eval(x['mt_tokens'])\n", + " mt_tbd_qe = ast.literal_eval(x['mt_tbd_qe'])\n", + " mt_wmt22_qe = ast.literal_eval(x['mt_wmt22_qe'])[:-1] # as omission rule right\n", + "\n", + " mt_pe_alignments_raw = ast.literal_eval(x['mt_pe_tbd_qe_alignments'])\n", + " mt_pe_alignments_dict = defaultdict(list)\n", + "\n", + " for k, v, score in mt_pe_alignments_raw:\n", + " if k is not None:\n", + " mt_pe_alignments_dict[k].append(v)\n", + "\n", + " for i, mt_tok in enumerate(mt_tokens):\n", + "\n", + " paired_pe_tok_i = mt_pe_alignments_dict[i][0] if mt_pe_alignments_dict[i] else None # SUB have to be paired with one PE token\n", + " if paired_pe_tok_i is None:\n", + " continue\n", + "\n", + " tbd_qe_tags = mt_tbd_qe[i]\n", + "\n", + " for tbd_qe_tag in tbd_qe_tags:\n", + " _df_tok_stats = pd.DataFrame([{\n", + " 'unit_id': _id,\n", + " 'lang_id': x['lang_id'],\n", + " 'mt_tok': mt_tok,\n", + " 'pe_tok': pe_tokens[paired_pe_tok_i],\n", + " 'mt_tbd_qe': tbd_qe_tag,\n", + " 'mt_wmt22_qe': mt_wmt22_qe[i],\n", + " }])\n", + " df_overlap = pd.concat([df_overlap, _df_tok_stats], ignore_index=True)\n", + "\n", + "df_overlap" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-27T13:52:55.913558Z", + "start_time": "2023-08-27T13:17:51.561633Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Check overlap with wmt22" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [], + "source": [ + "print('Note: BAD-DEL and BAD-SHF is overlapping with other cats')\n", + "pd.crosstab(df_overlap['mt_tbd_qe'], [df_overlap['lang_id'], df_overlap['mt_wmt22_qe']], rownames=['mt_tbd_qe'], colnames=['lang_id', 'mt_wmt22_qe'])" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [], + "source": [ + "df_overlap[(df_overlap['mt_tbd_qe'] == 'OK') & (df_overlap['mt_wmt22_qe'] == 'BAD')].sample(10)" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [], + "source": [ + "df_overlap[(df_overlap['mt_tbd_qe'] == 'BAD-EXP') & (df_overlap['mt_wmt22_qe'] == 'OK')].sample(5)" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 12, + "outputs": [], + "source": [ + "df_overlap" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 13, + "outputs": [], + "source": [ + "pd.crosstab(df_overlap['mt_tbd_qe'], df_overlap['mt_wmt22_qe']).T" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [], + "source": [ + "df_overlap" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 15, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 4))\n", + "sns.countplot(\n", + " df_overlap,\n", + " x='mt_tbd_qe',\n", + " hue='mt_wmt22_qe',\n", + ")\n", + "plt.show()" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Analyse BAD-SUB" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 162, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 5160/5160 [00:58<00:00, 88.93it/s] \n" + ] + }, + { + "data": { + "text/plain": " unit_id lang_id mt_tok pe_tok mt_pos \\\n0 flores101-main-ukr-100-pe1-1 ukr при від ADP \n1 flores101-main-ukr-100-pe1-1 ukr вступі повернення NOUN \n2 flores101-main-ukr-100-pe1-1 ukr фази фаза NOUN \n3 flores101-main-ukr-100-pe1-1 ukr бути проходити AUX \n4 flores101-main-ukr-100-pe1-3 ukr Повернувшись Проживши VERB \n... ... ... ... ... ... \n14804 flores101-main-vie-48-pe1-3 vie bằng trên ADP \n14805 flores101-main-vie-48-pe1-3 vie vận chuyển tàu thuyền VERB \n14806 flores101-main-vie-48-pe1-3 vie cuộc đoàn NOUN \n14807 flores101-main-vie-48-pe1-3 vie thoại truyền NOUN \n14808 flores101-main-vie-48-pe1-4 vie điện thoại thông NOUN \n\n pe_pos same_word same_pos same_lemma same_morf same_deprel \n0 ADP False True False False True \n1 NOUN False True False False False \n2 NOUN False True True False False \n3 VERB False False False True False \n4 VERB False True False True True \n... ... ... ... ... ... ... \n14804 ADP False True False True True \n14805 NOUN False False False True False \n14806 NOUN False True False True True \n14807 VERB False False False True True \n14808 ADJ False False False True False \n\n[14809 rows x 11 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
unit_idlang_idmt_tokpe_tokmt_pospe_possame_wordsame_possame_lemmasame_morfsame_deprel
0flores101-main-ukr-100-pe1-1ukrпривідADPADPFalseTrueFalseFalseTrue
1flores101-main-ukr-100-pe1-1ukrвступіповерненняNOUNNOUNFalseTrueFalseFalseFalse
2flores101-main-ukr-100-pe1-1ukrфазифазаNOUNNOUNFalseTrueTrueFalseFalse
3flores101-main-ukr-100-pe1-1ukrбутипроходитиAUXVERBFalseFalseFalseTrueFalse
4flores101-main-ukr-100-pe1-3ukrПовернувшисьПрожившиVERBVERBFalseTrueFalseTrueTrue
....................................
14804flores101-main-vie-48-pe1-3viebằngtrênADPADPFalseTrueFalseTrueTrue
14805flores101-main-vie-48-pe1-3vievận chuyểntàu thuyềnVERBNOUNFalseFalseFalseTrueFalse
14806flores101-main-vie-48-pe1-3viecuộcđoànNOUNNOUNFalseTrueFalseTrueTrue
14807flores101-main-vie-48-pe1-3viethoạitruyềnNOUNVERBFalseFalseFalseTrueTrue
14808flores101-main-vie-48-pe1-4vieđiện thoạithôngNOUNADJFalseFalseFalseTrueFalse
\n

14809 rows × 11 columns

\n
" + }, + "execution_count": 162, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# collect all BAD-SUB: read files, filter sents with BAD-SUB token, process to python lists, read alignments\n", + "df_stats = pd.DataFrame()\n", + "\n", + "for _id, x in tqdm(df.iterrows(), total=len(df)):\n", + " pe_tokens = ast.literal_eval(x['tgt_tokens'])\n", + " pe_annotations = ast.literal_eval(x['tgt_annotations'])\n", + " mt_annotations = ast.literal_eval(x['mt_annotations'])\n", + " mt_tokens = ast.literal_eval(x['mt_tokens'])\n", + " mt_tbd_qe = ast.literal_eval(x['mt_tbd_qe'])\n", + " mt_pe_alignments_raw = ast.literal_eval(x['mt_pe_tbd_qe_alignments'])\n", + " mt_pe_alignments_dict = defaultdict(list)\n", + "\n", + " for k, v, score in mt_pe_alignments_raw:\n", + " if k is not None:\n", + " mt_pe_alignments_dict[k].append(v)\n", + "\n", + " for i, mt_tok in enumerate(mt_tokens):\n", + " if 'BAD-SUB' in mt_tbd_qe[i]:\n", + " paired_pe_tok_i = mt_pe_alignments_dict[i][0] if mt_pe_alignments_dict[i] else None # SUB have to be paired with one PE token\n", + " if paired_pe_tok_i is None:\n", + " continue\n", + "\n", + " _df_tok_stats = pd.DataFrame([{\n", + " 'unit_id': _id,\n", + " 'lang_id': x['lang_id'],\n", + " 'mt_tok': mt_tok,\n", + " 'pe_tok': pe_tokens[paired_pe_tok_i],\n", + " 'mt_pos': mt_annotations[i]['upos'],\n", + " 'pe_pos': pe_annotations[paired_pe_tok_i]['upos'],\n", + " 'same_word': mt_tok.lower() == pe_tokens[paired_pe_tok_i].lower(),\n", + " 'same_pos': mt_annotations[i]['upos'] == pe_annotations[paired_pe_tok_i]['upos'],\n", + " 'same_lemma': mt_annotations[i]['lemma'] == pe_annotations[paired_pe_tok_i]['lemma'],\n", + " 'same_morf': mt_annotations[i]['feats'] == pe_annotations[paired_pe_tok_i]['feats'],\n", + " 'same_deprel': mt_annotations[i]['deprel'] == pe_annotations[paired_pe_tok_i]['deprel'],\n", + " }])\n", + " df_stats = pd.concat([df_stats, _df_tok_stats], ignore_index=True)\n", + "\n", + "\n", + "df_stats = df_stats.astype({'same_word': bool, 'same_pos': bool, 'same_lemma': bool, 'same_morf': bool, 'same_deprel': bool})\n", + "\n", + "df_stats" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-18T21:01:51.313256Z", + "start_time": "2023-07-18T21:00:53.213021Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "---" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 413, + "outputs": [ + { + "data": { + "text/plain": " total_sub diff_pos same_pos \\\nunit_id lang_id \nflores101-main-ara-1-pe1-1 ara 3 1 2 \nflores101-main-ara-1-pe1-4 ara 2 2 0 \nflores101-main-ara-1-pe2-2 ara 5 3 2 \nflores101-main-ara-1-pe2-3 ara 1 0 1 \nflores101-main-ara-1-pe2-4 ara 3 2 1 \n... ... ... ... \nflores101-main-vie-99-pe1-4 vie 1 1 0 \nflores101-main-vie-99-pe2-1 vie 3 0 3 \nflores101-main-vie-99-pe2-2 vie 3 1 2 \nflores101-main-vie-99-pe2-3 vie 7 1 6 \nflores101-main-vie-99-pe2-4 vie 4 0 4 \n\n diff_pos_percent \nunit_id lang_id \nflores101-main-ara-1-pe1-1 ara 0.333333 \nflores101-main-ara-1-pe1-4 ara 1.000000 \nflores101-main-ara-1-pe2-2 ara 0.600000 \nflores101-main-ara-1-pe2-3 ara 0.000000 \nflores101-main-ara-1-pe2-4 ara 0.666667 \n... ... \nflores101-main-vie-99-pe1-4 vie 1.000000 \nflores101-main-vie-99-pe2-1 vie 0.000000 \nflores101-main-vie-99-pe2-2 vie 0.333333 \nflores101-main-vie-99-pe2-3 vie 0.142857 \nflores101-main-vie-99-pe2-4 vie 0.000000 \n\n[4088 rows x 4 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
total_subdiff_possame_posdiff_pos_percent
unit_idlang_id
flores101-main-ara-1-pe1-1ara3120.333333
flores101-main-ara-1-pe1-4ara2201.000000
flores101-main-ara-1-pe2-2ara5320.600000
flores101-main-ara-1-pe2-3ara1010.000000
flores101-main-ara-1-pe2-4ara3210.666667
..................
flores101-main-vie-99-pe1-4vie1101.000000
flores101-main-vie-99-pe2-1vie3030.000000
flores101-main-vie-99-pe2-2vie3120.333333
flores101-main-vie-99-pe2-3vie7160.142857
flores101-main-vie-99-pe2-4vie4040.000000
\n

4088 rows × 4 columns

\n
" + }, + "execution_count": 413, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# precalculate df for charts: count same_pos, diff_pos, total_sub\n", + "df_stats_ext = df_stats[['unit_id', 'lang_id', 'same_pos']].copy()\n", + "df_stats_ext['diff_pos'] = ~df_stats_ext['same_pos']\n", + "df_stats_ext['total_sub'] = 1\n", + "df_stats_ext_sum = df_stats_ext.groupby(['unit_id', 'lang_id'])[['total_sub', 'diff_pos', 'same_pos']].sum()\n", + "df_stats_ext_sum = df_stats_ext_sum[(df_stats_ext_sum['total_sub'] < 10) & (df_stats_ext_sum['diff_pos'] < 6)]\n", + "df_stats_ext_sum['diff_pos_percent'] = df_stats_ext_sum['diff_pos'] / df_stats_ext_sum['total_sub']\n", + "df_stats_ext_sum" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-19T10:23:35.738184Z", + "start_time": "2023-07-19T10:23:35.680165Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 414, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "f, axes = plt.subplots(2, 3, figsize=(9, 6), sharex=True, sharey=True)\n", + "\n", + "for ax, lang in zip(axes.flat, df['lang_id'].unique()):\n", + "\n", + " sns.histplot(\n", + " df_stats_ext_sum.loc[(slice(None),lang), :],\n", + " x=\"diff_pos_percent\",\n", + " kde=True,\n", + " ax=ax,\n", + " )\n", + " ax.set_title(lang)\n", + " # ax.set_axis_off()\n", + "\n", + "ax.set(xlim=(0, 1))\n", + "f.suptitle(\"Percent of diff_pos of all BAD-SUB\", fontsize=12)\n", + "plt.show()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-19T10:23:59.005619Z", + "start_time": "2023-07-19T10:23:54.861742Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 416, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 416, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeoAAAHpCAYAAABN+X+UAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABUF0lEQVR4nO3deXgTBf4G8HeSNEnvpG16l6NcLVDaUgRWQUFBAUEBjxUEUXBxV4HfKqIi6qKguKCuIHigyIqoCwjihauigrJKwQrlhrZQWnpf6ZHmaJL5/VGIlMu0TZNp+n6eJw9kJpl859skb+YWRFEUQURERJIk83QBREREdHkMaiIiIgljUBMREUkYg5qIiEjCGNREREQSxqAmIiKSMAY1ERGRhDGoiYiIJEzh6QKkrLy8Fq44HUxIiD8qKw2tn5CXY5+cx145h31yHnvlHFf2SacLdOpxXKJuY4IAyOUyCIKnK5E29sl57JVz2CfnsVfO8VSfGNREREQSxqAmIiKSMAY1ERGRhDGoiYiIJIxBTUREJGEMaiIiIgljUBMREUkYg5qIiEjCGNREREQSxqAmIiKSMAY1ERGRhDGoiYiIJIxBTUREJGEMaiIiIgljUBMREUkYg5qIiEjCGNREREQSpvB0AR2FXN42v4nsdhF2u9gm0yYiIs9jULcxmUyAXRSh1fq3yfRtdhH6KgPDmojISzGo25ggCJAJArZm5KOi1uTSaYcGqjE+La7xxwCDmojIKzGo3aSi1oTiatcGNREReT/uTEZERCRhDGoiIiIJY1ATERFJGIOaiIhIwiQR1BaLBWPHjkV6ejoA4IknnkCvXr0uut1zzz2O5wwYMOCi8QaDAQBgNpvx5JNPYsCAARgyZAjeffddj8wXERFRa3l8r2+z2Yy5c+ciKyvLMWzBggWYO3eu435BQQGmTp3qCOqSkhLU1tZi+/btUKvVjsf5+fkBAJYuXYpDhw7hvffeQ2FhIR5//HFER0dj1KhRbporIiIi1/BoUGdnZ2Pu3LkQxabHAAcGBiIwMNBx/4knnsCoUaMwYsQIAEBOTg50Oh3i4uIummZ9fT02bdqEt99+G3369EGfPn2QlZWFDz74gEFNRETtjkeDes+ePRg0aBAefvhhpKSkXPIxv/zyC/bu3Yuvv/7aMSw7Oxtdu3a95OOPHTsGq9WK1NRUx7C0tDS8+eabsNvtkMmcX9svCE4/9I+n4YJpOfU67dS5+tv7fLgDe+Uc9sl57JVzPNUnjwb15MmT//Axq1evxoQJExAVFeUYlpOTA6PRiKlTp+LUqVNITEzEk08+ia5du6KsrAxarRZKpdLx+LCwMJjNZuj1eoSEhDhdX2ho4B8/yElqlRJ+fq49e5ha3TiPbXV6Uk9wZc+9HXvlHPbJeeyVc9zdJ49vo76S/Px87N69GwsWLGgy/OTJk6iursYjjzyCgIAAvP3227j33nvx5Zdfwmg0NglpAI77FoulWa9fUVELsZXZqlDIoNH4w2S2oL7e3LqJXcDk0/izrqrKAJvN7tJpu5sgNL75XdFzb8deOYd9ch575RxX9ykszLnAl3RQf/3110hMTET37t2bDF+zZg0aGhrg79+4JPnSSy/huuuuww8//ACVSnVRIJ+7f/6OZ84QRbT6j+F4fhu/+b3lw+WKnncU7JVz2CfnsVfOcXefJHF41uX89NNPuOGGGy4arlQqHSENACqVCrGxsSgpKUFERASqqqpgtVod48vKyqBWqxEUFOSWuomIiFxFskEtiiIOHjyI/v37XzR8xIgR2LJli2NYfX09Tp8+jfj4eCQmJkKhUGD//v2O8RkZGUhKSmrWjmRERERSINlV3wUFBTAYDBet9hYEAcOGDcNrr72GmJgYhISEYPny5YiMjMR1110HuVyO8ePHY+HChXjhhRdQWlqKd999F0uWLPHQnBAREbWcZIO6oqICABAcHHzRuHnz5kGhUGDu3Lmoq6vD4MGDsXr1asjlcgDA/PnzsXDhQkybNg0BAQGYPXs2brzxRrfWT0RE5AqCeOHZRsihvNw1e31rtf5YsyPL5dejjgxWY8awHqiqMsBqbf97fYeFBbqk596OvXIO++Q89so5ru6TTufcXt/caEtERCRhDGoiIiIJY1ATERFJGIOaiIhIwhjUREREEsagJiIikjAGNRERkYQxqImIiCSMQU1ERCRhDGoiIiIJY1ATERFJGIOaiIhIwhjUREREEsagJiIikjAGNRERkYQxqImIiCSMQU1ERCRhDGoiIiIJY1ATERFJGIOaiIhIwhjUREREEsagJiIikjAGNRERkYQxqImIiCSMQU1ERCRhDGoiIiIJY1ATERFJGIOaiIhIwhjUREREEsagJiIikjAGNRERkYQxqImIiCSMQU1ERCRhDGoiIiIJY1ATERFJGIOaiIhIwhjUREREEsagJiIikjAGNRERkYQxqImIiCSMQU1ERCRhDGoiIiIJY1ATERFJGIOaiIhIwiQR1BaLBWPHjkV6erpj2OLFi9GrV68mt/Xr1zvGf/HFFxgxYgSSk5Px0EMPobKy0jFOFEW89NJLGDx4MAYOHIilS5fCbre7dZ6IiIhcQeHpAsxmM+bOnYusrKwmw3NycjB37lxMmDDBMSwgIAAAcODAASxYsADPPvssEhIS8Pzzz2P+/Pl46623AABr167FF198gZUrV8JqtWLevHkIDQ3FjBkz3DdjRERELuDRJers7GzceeedyMvLu2hcTk4OevfuDZ1O57j5+voCANavX4/Ro0dj/PjxSEhIwNKlS7Fz507k5+cDANatW4c5c+ZgwIABGDx4MB599FF88MEHbp03IiIiV/BoUO/ZsweDBg3Chg0bmgyvq6tDSUkJunTpcsnnZWZmYsCAAY77UVFRiI6ORmZmJkpKSlBUVISrrrrKMT4tLQ0FBQUoLS1tk/kgIiJqKx5d9T158uRLDs/JyYEgCHjzzTfx448/QqPR4L777nOsBi8tLUV4eHiT54SGhqK4uBhlZWUA0GR8WFgYAKC4uPii512JIDRrdq48DRdMy6nXaafO1d/e58Md2CvnsE/OY6+c46k+eXwb9aWcPHkSgiAgPj4eU6ZMwd69e/H0008jICAAI0eOhMlkglKpbPIcpVIJi8UCk8nkuH/+OKBxp7XmCA0NbOWc/E6tUsLPT3TZ9ABArW6cL63W36XT9SRX9tzbsVfOYZ+cx145x919kmRQjx8/HsOHD4dGowEAJCQkIDc3Fx999BFGjhwJlUp1UehaLBb4+vo2CWWVSuX4PwDHNm5nVVTUQmxltioUMmg0/jCZLaivN7duYhcw+TT+rKuqMsBma997tQtC45vfFT33duyVc9gn57FXznF1n8LCnAt8SQa1IAiOkD4nPj4eu3fvBgBERESgvLy8yfjy8nLodDpEREQAAMrKyhAbG+v4PwDodLpm1SGKaPUfw/H8Nn7ze8uHyxU97yjYK+ewT85jr5zj7j5J4jjqCy1fvhz33ntvk2HHjh1DfHw8ACA5ORkZGRmOcUVFRSgqKkJycjIiIiIQHR3dZHxGRgaio6ObtX2aiIhICiS5RD18+HCsXr0aa9aswciRI7Fr1y5s3boV69atAwBMmjQJU6dORUpKCpKSkvD8889j2LBhiIuLc4x/6aWXEBkZCQB4+eWXMX36dI/NDxERUUtJMqj79euH5cuXY8WKFVi+fDliYmLw8ssvIzU1FQCQmpqK5557DitWrEB1dTWuueYaLFq0yPH8GTNmoKKiArNmzYJcLsftt99+0RI6ERFReyCIIrdIXE55uWt2JtNq/bFmRxaKq02uKeysyGA1ZgzrgaoqA6zW9r8zWVhYoEt67u3YK+ewT85jr5zj6j7pdM7tTCbJbdRERETUiEFNREQkYQxqIiIiCWNQExERSRiDmoiISMIY1ERERBLGoCYiIpIwBjUREZGEMaiJiIgkjEFNREQkYQxqIiIiCWNQExERSRiDmoiISMIY1ERERBLGoCYiIpIwBjUREZGEMaiJiIgkjEFNREQkYQxqIiIiCWNQExERSRiDmoiISMIY1ERERBLGoCYiIpIwBjUREZGEMaiJiIgkjEFNREQkYQxqIiIiCWNQExERSRiDmoiISMIY1ERERBLGoCYiIpIwBjUREZGEMaiJiIgkjEFNREQkYQxqIiIiCWNQExERSRiDmoiISMIY1ERERBLGoCYiIpIwBjUREZGEMaiJiIgkjEFNREQkYQxqIiIiCWNQExERSRiDmoiISMIkEdQWiwVjx45Fenq6Y9j+/ftx1113ITU1FTfddBM2bdrU5Dm33HILevXq1eR24sQJAIAoinjppZcwePBgDBw4EEuXLoXdbnfrPBEREbmCwtMFmM1mzJ07F1lZWY5hZWVl+Mtf/oJJkybhxRdfxOHDhzF//nzodDoMGzYMNpsNubm5WL9+Pbp06eJ4nlarBQCsXbsWX3zxBVauXAmr1Yp58+YhNDQUM2bMcPfsERERtYpHgzo7Oxtz586FKIpNhm/fvh1hYWF45JFHAABdunRBeno6Pv/8cwwbNgxnzpxBQ0MD+vXrB5VKddF0161bhzlz5mDAgAEAgEcffRTLly9nUBMRUbvj0aDes2cPBg0ahIcffhgpKSmO4UOHDkViYuJFj6+rqwPQGPBRUVGXDOmSkhIUFRXhqquucgxLS0tDQUEBSktLER4e7nR9gtCMmfmjabhgWk69Tjt1rv72Ph/uwF45h31yHnvlHE/1yaNBPXny5EsOj42NRWxsrON+RUUFvvzyS8yePRsAkJOTAx8fHzzwwAM4dOgQunbtisceewz9+vVDWVkZADQJ5LCwMABAcXFxs4I6NDSw2fN0OWqVEn5+4h8/sDnTVCsBAFqtv0un60mu7Lm3Y6+cwz45j71yjrv75PFt1H/EZDJh9uzZCAsLw5///GcAwKlTp1BdXY077rgDc+bMwcaNGzFt2jRs27YNJpMJAKBUKh3TOPd/i8XSrNeuqKiF2MpsVShk0Gj8YTJbUF9vbt3ELmDyafxZV1VlgM3WvneWE4TGN78reu7t2CvnsE/OY6+c4+o+hYU5F/iSDmqDwYAHH3wQubm5+PDDD+Hr6wsAWLRoEUwmEwICAgAACxcuxG+//YZPP/0UV199NYDGUD63avxcQJ97vrNEEa3+Yzie38Zvfm/5cLmi5x0Fe+Uc9sl57JVz3N0nSRyedSl1dXWYMWMGsrKy8N577zXZu1uhUDhCGgAEQUB8fDxKSkoQEREBAI5V4Of/X6fTuad4IiIiF5FkUNvtdsyaNQtnzpzB+++/jx49ejQZP3XqVKxcubLJ448fP474+HhEREQgOjoaGRkZjvEZGRmIjo5u1vZpIiIiKZDkqu+PP/4Y6enpeOONNxAUFORYIvbx8YFGo8H111+PVatWITExEV27dsW6detQW1uLCRMmAAAmTZqEl156CZGRkQCAl19+GdOnT/fY/BAREbWUJIP666+/ht1uxwMPPNBk+MCBA/H+++/j3nvvhdlsxuLFi1FeXo7k5GSsXbvWsTp8xowZqKiowKxZsyCXy3H77bfj3nvv9cCcEBERtY4gXni2EXIoL3fNXt9arT/W7MhCcbXJNYWdFRmsxoxhPVBVZYDV2v73+g4LC3RJz70de+Uc9sl57JVzXN0nnc65vb4luY2aiIiIGjGoiYiIJIxBTUREJGEMaiIiIgljUBMREUkYg5qIiEjCGNREREQSxqAmIiKSMAY1ERGRhDGoiYiIJIxBTUREJGEMaiIiIgljUBMREUkYg5qIiEjCGNREREQSxqAmIiKSMAY1ERGRhDGoiYiIJIxBTUREJGEMaiIiIgljUBMREUkYg5qIiEjCGNREREQSxqAmIiKSMAY1ERGRhDGoiYiIJIxBTUREJGEMaiIiIgljUBMREUkYg5qIiEjCGNREREQSxqAmIiKSMAY1ERGRhDGoiYiIJIxBTUREJGEMaiIiIgljUBMREUkYg5qIiEjCGNREREQSxqAmIiKSMAY1ERGRhDGoiYiIJIxBTUREJGEMaiIiIgmTRFBbLBaMHTsW6enpjmH5+fm49957kZKSgjFjxmDXrl1NnvPzzz9j7NixSE5Oxj333IP8/Pwm4//9739j6NChSE1NxZNPPgmj0eiWeSEiInIllwd1ZWVlsx5vNpvxyCOPICsryzFMFEU89NBDCAsLw+bNm3Hrrbdi1qxZKCwsBAAUFhbioYcewsSJE/Hxxx8jJCQEDz74IERRBAB8/fXXWLlyJZ577jm89957yMzMxLJly1w3k0RERG7SoqBOTEy8ZCAXFBTghhtucHo62dnZuPPOO5GXl9dk+O7du5Gfn4/nnnsO3bp1wwMPPICUlBRs3rwZALBp0yb07dsX06dPR48ePbBkyRIUFBRgz549AIB169Zh2rRpGD58OPr164dnn30Wmzdv5lI1ERG1OwpnH7h161Zs2bIFwO9LvD4+Pk0eU1paCp1O5/SL79mzB4MGDcLDDz+MlJQUx/DMzEz07t0bfn5+jmFpaWnYv3+/Y/yAAQMc43x9fdGnTx/s378fAwYMwMGDBzFr1izH+JSUFDQ0NODYsWNITU11uj4iIiJPczqoR44ciTNnzgBoDNiUlBT4+/s3eYyfnx9Gjhzp9ItPnjz5ksPLysoQHh7eZFhoaCiKi4v/cHxNTQ3MZnOT8QqFAhqNxvF8ZwlCsx5+5Wm4YFpOvU47da7+9j4f7sBeOYd9ch575RxP9cnpoPb393cspcbExGDMmDFQqVRtUpTRaIRSqWwyTKlUwmKx/OF4k8nkuH+55zsrNDSwuaVfllqlhJ+f6LLpAYBa3TiPWq3/Hzyy/XBlz70de+Uc9sl57JVz3N0np4P6fBMmTMDp06dx6NAhNDQ0XDR+/PjxrSpKpVJBr9c3GWaxWKBWqx3jLwxdi8WCoKAgx4+HS4339fVtVh0VFbUQW5mtCoUMGo0/TGYL6uvNrZvYBUw+jT/rqqoMsNnsLp22uwlC45vfFT33duyVc9gn57FXznF1n8LCnAv8FgX1O++8g5deegnBwcEXrf4WBKHVQR0REYHs7Owmw8rLyx2rsyMiIlBeXn7R+MTERGg0GqhUKpSXl6Nbt24AAKvVCr1e36zt5wAgimj1H8Px/DZ+83vLh8sVPe8o2CvnsE/OY6+c4+4+tSio3333XcybNw8zZsxwdT0AgOTkZKxevRomk8mxFJ2RkYG0tDTH+IyMDMfjjUYjjhw5glmzZkEmkyEpKQkZGRkYNGgQAGD//v1QKBRISEhok3qJiIjaSosOzzKbzbjxxhtdXYvDwIEDERUVhfnz5yMrKwurV6/GgQMHcPvttwMAbrvtNvz2229YvXo1srKyMH/+fMTGxjqCefLkyVizZg22b9+OAwcOYOHChbjzzjubveqbiIjI01oU1OPGjcOHH37oOMGIq8nlcrz++usoKyvDxIkT8dlnn2HVqlWIjo4GAMTGxuK1117D5s2bcfvtt0Ov12PVqlUQzu6Kd/PNN+OBBx7AM888g+nTp6Nfv36YN29em9RKRETUlgSxBWn76KOP4r///S+0Wi1iY2MvOp563bp1LivQk8rLXbMzmVbrjzU7slBcbXJNYWdFBqsxY1gPVFUZYLW2/53JwsICXdJzb8deOYd9ch575RxX90mna8Odybp06YK//vWvLXkqERERNUOLgvr8s34RERFR22lRUM+fP/+K45csWdKiYoiIiKgpl1w9y2q14tSpU9i2bRtCQkJcMUkiIiJCC5eoL7fE/M477+DEiROtKoiIiIh+59LrUY8aNQrffvutKydJRETUobksqOvr67Fx40ZotVpXTZKIiKjDa9Gq74SEBMfJRc6nUqmwePHiVhdFREREjVoU1Bee0EQQBPj4+KB79+4ICAhwSWFERETUwqAeOHAgACA3Nxc5OTmw2+3o2rUrQ5qIiMjFWhTUNTU1mD9/Pr777jsEBwfDZrPBYDDgqquuwqpVqxAYyIuPExERuUKLdiZbvHgxiouLsW3bNqSnp+PXX3/F559/jvr6ep7shIiIyIVaFNTff/89Fi5ciPj4eMew7t2745lnnsF3333nsuKIiIg6uhYFtUqlgkx28VMFQYDNZmt1UURERNSoRUF9/fXX49lnn0VeXp5jWG5uLhYvXozrrrvOZcURERF1dC3amWzevHl46KGHcNNNNyEoKAgAUF1djWuvvRZPP/20SwskIiLqyJod1KdPn0Z0dDTef/99HD9+HDk5OVCpVOjSpQu6devWFjUSERF1WE6v+hZFEYsXL8bo0aOxb98+AECvXr0wZswYbN68GWPHjsWLL74IURTbrFgiIqKOxumgXrduHbZt24ZVq1Y5Tnhyzuuvv45Vq1bhk08+wUcffeTyIomIiDoqp4N648aNePrppzF8+PBLjr/++uvx6KOPMqiJiIhcyOmgLigoQL9+/a74mMGDByM/P7/VRREREVEjp4M6NDQUBQUFV3xMcXExNBpNa2siIiKis5wO6pEjR+K1115DQ0PDJcdbrVasXLkSQ4YMcVlxREREHZ3Th2c9+OCDuP322zFx4kRMnToVffv2RWBgIKqrq3H48GGsX78eBoMBS5cubct6iYiIOhSngzooKAgbN27ESy+9hBdffBFGoxFA42FbgYGBGDNmDGbPno2wsLA2K5aIiKijadYJTzQaDRYvXoxnnnkG+fn5qKmpgUajQadOnSCXy9uqRiIiog6rRacQVSqVPAsZERGRG7ToohxERETkHgxqIiIiCWNQExERSRiDmoiISMIY1ERERBLGoCYiIpIwBjUREZGEMaiJiIgkjEFNREQkYQxqIiIiCWNQExERSRiDmoiISMIY1ERERBLGoCYiIpIwBjUREZGEMaiJiIgkjEFNREQkYQxqIiIiCZNsUG/ZsgW9evW66JaQkAAA+Nvf/nbRuB9++MHx/H//+98YOnQoUlNT8eSTT8JoNHpqVoiIiFpM4ekCLmfMmDEYOnSo477VasW0adMwbNgwAEBOTg6WLVuGP/3pT47HBAcHAwC+/vprrFy5EsuWLUNoaCjmz5+PZcuW4ZlnnnHrPBAREbWWZJeo1Wo1dDqd4/bZZ59BFEU8+uijsFgsOHPmDJKSkpo8RqlUAgDWrVuHadOmYfjw4ejXrx+effZZbN68mUvVRETU7kh2ifp8er0eb7/9NhYvXgylUoljx45BEATExcVd9FibzYaDBw9i1qxZjmEpKSloaGjAsWPHkJqa6vTrCkLra/9PxhnoG+woqzYiwEcOlaJtfhu5olZPOld/e58Pd2CvnMM+OY+9co6n+tQugvqjjz5CeHg4Ro0aBQA4efIkAgIC8Nhjj2HPnj2IjIzE7Nmzcd1116GmpgZmsxnh4eGO5ysUCmg0GhQXFzfrdUNDA1tVd53Zin9uz3bc91fJcXNSFKKCfVs13XPU6sY1CFqtv0umJwWt7XlHwl45h31yHnvlHHf3SfJBLYoiNm3ahPvvv98x7OTJkzCZTBgyZAhmzpyJb7/9Fn/729+wYcMGhIWFAYBjNfg5SqUSFoulWa9dUVELUWxd/avuTMIvedX4IrMQeqMVH2ecwfAeYYgP9WvdhAGYfBp/1lVVGWCz2Vs9PU8ShMY3vyt67u3YK+ewT85jr5zj6j6FhTkX+JIP6oMHD6KkpAQ333yzY9iDDz6IqVOnOnYeS0hIwOHDh7Fx40Y8/PDDAHBRKFssFvj6Nm9JVhTR6j/G1V1DcXP/TogO8MHG3wqRW2nEzuwKhPj5QOPr07qJn8dbPlyu6HlHwV45h31yHnvlHHf3SbI7k53z008/YcCAAY5QBgCZTNbkPgDEx8ejpKQEGo0GKpUK5eXljnFWqxV6vR46nc5tdV9IpZBjRM8wRAerYLWL+O5EOax2fiKIiOjKJB/UBw4cQP/+/ZsMe+KJJzB//vwmw44dO4b4+HjIZDIkJSUhIyPDMW7//v1QKBSOY7A9RRAEDO8eBrVChsr6Buw7U+3ReoiISPokH9RZWVno3r17k2HXX389Pv/8c2zduhWnT5/GypUrkZGRgSlTpgAAJk+ejDVr1mD79u04cOAAFi5ciDvvvLPZq77bgp9SjiHxIQCAQ0W1qLfYPFwRERFJmeS3UZeXlyMoKKjJsBtvvBH/+Mc/8MYbb6CwsBA9evTAO++8g9jYWADAzTffjIKCAjzzzDOwWCy48cYbMW/ePE+Uf0ldQnwRHqBEaZ0F+wuqcXXXEE+XREREEiWIIncduJzy8tbv2adQyKDV+mPNjiwUV5scwwuqTdh2pBQyAbgzNRqBqub/ZooMVmPGsB6oqjLAam3/e32HhQW6pOfejr1yDvvkPPbKOa7uk07n3F7fkl/17a1igtWIDlLBLjauAiciIroUBrUH9YtuXKV/orQODe38OGgiImobDGoPitWoEahSwGITkVNe7+lyiIhIghjUHiQIAnpHBgAAjpTUgrsLEBHRhRjUHtZT5w+5AFQYGlBW17xTnBIRkfdjUHuY2keOLmfP+51dbvBwNUREJDUMagnoHtZ49auTFfWwc/U3ERGdh0EtAbHBaqgUMhgb7Cg871hrIiIiBrUEyGSC47KX3PubiIjOx6CWiG5hjUF9qrKeV9UiIiIHBrVERAaq4K+Uo8EmooCrv4mI6CwGtUQIgoDO2sare52u5OpvIiJqxKCWkM4hjUGdV2Xk3t9ERASAQS0pUUFq+MgFGBvsPPkJEREBYFBLilwmoJOmcak6l6u/iYgIDGrJObf6+3Sl0cOVEBGRFDCoJSZO4wtBAKpNVlQbGzxdDhEReRiDWmKUChkiA1UAgDM8TIuIqMNjUEtQ7Nnt1Gf0XP1NRNTRMaglKE6jBgAUVpth41nKiIg6NAa1BIX4+cDXRwarXURxjdnT5RARkQcxqCVIEATEnV39nc/V30REHRqDWqJiz67+PqPnDmVERB0Zg1qiYoIbg7rK2IB6i83D1RARkacwqCVK7SNHqL8PAKCwhkvVREQdFYNawqKDzu39zaAmIuqoGNQSFh38+2FaRETUMTGoJSwySAUBQK3Zilqz1dPlEBGRBzCoJUwpl0EXoAQAFHH1NxFRh8Sglrhzq78LGNRERB0Sg1rizgV1UY0ZosjTiRIRdTQMaomLCFBCJgAGiw3VJm6nJiLqaBjUEqeQyxBx9rKXPEyLiKjjYVC3A47DtHiBDiKiDodB3Q6cO/FJUbWJ26mJiDoYBnU7oAtQQiETYLLaUVnf4OlyiIjIjRjU7YBcJiAy6Ox2ap73m4ioQ2FQtxO/n/eb26mJiDoSBnU7ER3cuERdXGOCndupiYg6DAZ1OxHqr4SPXIDFJqKK26mJiDoMBnU7IRMEx/HURdxOTUTUYTCo25EobqcmIupwGNTtSNTZPb+La3nebyKijoJB3Y7o/BuPpzZb7dxOTUTUQUg6qL/99lv06tWryW3OnDkAgCNHjuCOO+5AcnIybrvtNhw6dKjJc7/44guMGDECycnJeOihh1BZWemJWXApmez37dQ8nSgRUccg6aDOzs7G8OHDsWvXLsdt8eLFqK+vx8yZMzFgwABs2bIFqampeOCBB1BfXw8AOHDgABYsWIBZs2Zhw4YNqKmpwfz58z08N67hWP3NHcqIiDoESQd1Tk4OevbsCZ1O57gFBQVh27ZtUKlUeOyxx9CtWzcsWLAA/v7++O9//wsAWL9+PUaPHo3x48cjISEBS5cuxc6dO5Gfn+/hOWq9czuU8frUREQdg+SDukuXLhcNz8zMRFpaGgRBAAAIgoD+/ftj//79jvEDBgxwPD4qKgrR0dHIzMx0R9ltSheghPzseb/L6iyeLofIQSYToFDI2uQmkwmenj0ij1F4uoDLEUURp06dwq5du/DWW2/BZrNh1KhRmDNnDsrKytC9e/cmjw8NDUVWVhYAoLS0FOHh4ReNLy4ublYNggu+GxzTcNH3jFwmICJQicJqM/KqjBe/Tjt1rv72Ph/uIMVeyWQCNBr/NgtUu12EXm+A3e78WiQp9kmq2CvneKpPkg3qwsJCGI1GKJVKvPrqqzhz5gwWL14Mk8nkGH4+pVIJi6VxCdNkMl1xvLNCQwNbNxPnUauU8PNzzarqzqEBKKw240x143ZqrdbfJdOVAlf23NtJsVdfHSxCpcG1a3pC/JUYnRSFkJCAFj1fin2SKvbKOe7uk2SDOiYmBunp6QgODoYgCEhMTITdbse8efMwcODAi0LXYrFArW7cfqtSqS453tfXt1k1VFTUorWbgRUKGTQaf5jMFtTXu2ZP7VBfOQDgZJkBoihCr6+HzWZ3ybQ9RRAa3/yu6Lm3k2Kv5HIZtFp/FFbUobjatTs6moIbP9dVVYZmvc+l2CepYq+c4+o+hYU5F/iSDWoA0Gg0Te5369YNZrMZOp0O5eXlTcaVl5c7VndHRERccrxOp2vW64siWv3HcDzfhW/+8AAV5IIAg8WGnDIDQn0Er/lwuaLnHUVH7FVL5rcj9qml2CvnuLtPkt2Z7KeffsKgQYNgNP6+Hfbo0aPQaDRIS0vDvn37HHs9i6KI3377DcnJyQCA5ORkZGRkOJ5XVFSEoqIix/j2Ti4TEB7YuGo//VSFh6shIqK2JNmgTk1NhUqlwlNPPYWTJ09i586dWLp0Ke6//36MGjUKNTU1eP7555GdnY3nn38eRqMRo0ePBgBMmjQJn376KTZt2oRjx47hsccew7BhwxAXF+fhuXKdc4dp7T7Z/k/kQkRElyfZoA4ICMCaNWtQWVmJ2267DQsWLMCf//xn3H///QgICMBbb72FjIwMTJw4EZmZmVi9ejX8/PwANIb8c889h1WrVmHSpEkIDg7GkiVLPDxHrnXuxCfpJyt4PDURkReT9DbqHj16YO3atZcc169fP3zyySeXfe7EiRMxceLEtirN48IDVZDLBJTWNh6mFXN2CZuIiLyLZJeo6coUMgExZ/eGzcjTe7YYIiJqMwzqdqxzSOPhZhn5es8WQkREbYZB3Y51DmncJp+RX83t1EREXopB3Y7FatTwkQsoqTWjwMUnmSAiImlgULdjPnIZkmM1AIDf8qs9WwwREbUJBnU7Nzg+FADw2xm9ZwshIqI2waBu5wbFhwBo3E5NRETeh0HdzqV11kIhE1Bca0Yht1MTEXkdBnU756dUoHdk4xVYuPqbiMj7MKi9QFonDQCu/iYi8kYMai+QFhcMAPjtDIOaiMjbMKi9QEpMMOQCUFhtQlENt1MTEXkTBrUX8Fcp0DsyCACw53SVh6shIiJXYlB7iUGdNQCA9NN6j9ZBRESuxaD2EoM6awEAe/P0sPO830REXoNB7SX6RgXCXymH3tiAE6V1ni6HiIhchEHtJRRyGfrHNu79zdXfRETeg0HtRc6t/k7nDmVERF6DQe1FBnVpDOr9BdUwNdg8XA0REbkCg9qLdNb6IiJQhQabiH0FPPkJEZE3YFB7EUEQfj9MK1fv0VqIiDxFJhOgUMhcfpPLPROZCo+8KrWZQZ21+OxQCfbkcTs1EXU8MpkAjdYfcpnQJtO3iyJkMgE2m/sOg2VQe5mrzl6gI6vMgAqDBaH+Ss8WRETkRjKZALlMwNaMfFTUuvaUyqGBaoxPi4MgCAAY1NRCWj8leoUH4HhpHfbkVWF0YoSnSyIicruKWhOKq73j2gfcRu2FeDpRIiLvwSVqLzSosxbr9p5Bem4VRFE8u5qGXEUmEyBro+1fAGC3i7DbeRpYImrEoPZCKTHB8PWRodxgwfHSOiREBHq6JK/R1juqAIDNLkJfZWBYExEABrVXUipkGNRZix3ZFfjpZCWD2oXackcV4PedVWQygUFNRAAY1F5raHwodmRXYNfJSvzlT509XY7X8aYdVYhI2rgzmZe6Oj4EAHCkuBblBouHqyEiopZiUHupMH8lekc2rvL++WSlh6shIqKWYlB7sSFnl6p/Olnh4UqIiKilGNRe7NpuoQCAX3KrYOTVtIiI2iUGtRfrqfNHdLAaZqsdv+Ty3N9ERO0Rg9qLCYKA4d3DAADfnyjzcDVERNQSDGovd33PxqDedbISFqvdw9UQEVFzMai9XN+oQOgClDBYbNibp/d0OURE1EwMai8nEwQMO7v6eztXfxMRtTsM6g5gRK/GoP4hqxxmrv4mImpXGNQdQEpMMCICVTBYbNjFY6qJiNoVBnUHIBME3JQQDgD479FSD1dDRETNwaDuIEYl6gAA/ztViRpTg4erISIiZzGoO4geugB0C/NDg03E9hPlni6nzclkAhQKmctvcjk/MkTkXrzMZQdyc+8IrPjxFD47WIyJ/aI8XU6bkckEaLT+kMuEtnsRoQ2nTUR0HgZ1BzKmdwRW7crF4eJanCitQ8/wAE+X1CZkMgFymYCtGfmoqHXtNaPjwwMxvHckc5qI3EbS6/FKSkowZ84cDBw4EEOHDsWSJUtgNpsBAIsXL0avXr2a3NavX+947hdffIERI0YgOTkZDz30ECoreanHUH8lhnVvvFDH1oPFHq6m7VXUmlBc7dpbdT2v7U1E7iXZoBZFEXPmzIHRaMQHH3yAf/3rX/jhhx/w6quvAgBycnIwd+5c7Nq1y3G77bbbAAAHDhzAggULMGvWLGzYsAE1NTWYP3++B+dGOiYkNa7y/upoCUy8ohYRkeRJNqhPnjyJ/fv3Y8mSJejRowcGDBiAOXPm4IsvvgDQGNS9e/eGTqdz3Hx9fQEA69evx+jRozF+/HgkJCRg6dKl2LlzJ/Lz8z05S5JwVWcNooPVqDPb8PUxHqp1OTa7CIPFigqDBcU1JhTXmFBUY0JOmQGHCqpRY2yAzS56ukwi6gAku41ap9PhnXfeQVhYWJPhdXV1qKurQ0lJCbp06XLJ52ZmZuIvf/mL435UVBSio6ORmZmJuLg4p2twxXZIxzTaeJums7XKBQG3J0dhxY+n8GFGAW5NioQggQ2u50pwZyl2UYTe2IAKQwP0xgZUm6yoMTagxmxFg+1yIVyK13aedNxTygUE+/ogxM8HIX5KhPr5QBeogsIFO7Jdrhee6JVUNGeeO3Kfmou9ctJ5fXJnryQb1EFBQRg6dKjjvt1ux/r16zF48GDk5ORAEAS8+eab+PHHH6HRaHDfffdhwoQJAIDS0lKEh4c3mV5oaCiKi5u3XTY0NLD1M3KWWqWEn59rl8DUaiUAQKv1b9bz7r++B95Nz8fJinocqjRheK/wP36Sm7i052ol/Bp+77mpwYYCvREFeiOKq00oqzXDeoWlYgGA2kcOpULm+FAqzx6eVV5nhl0ELDYRZXUWlNVZABgAAHKZgGiNGp20fugc6o+wAKXTP4aa8zd1Za+uxG4XkV1Wh6NFNcgpM6CizoxakxUKmQA/lRxxWj90Dw/AAJXyop67Qkvf5+e4q0/ewJt61SbvRVXje1Gjadl7saUkG9QXWrZsGY4cOYKPP/4Yhw8fhiAIiI+Px5QpU7B37148/fTTCAgIwMiRI2EymaBUKps8X6lUwmJp3o5AFRW1EFv5d1YoZNBo/GEyW1Bfb27dxC5g8mn88q+qMsBma945vG/tG4EPMgrwxndZSAr1dWldLSEIjV8Srui5XC6DVusPQ70Z2UXVyKsyoqDahMr6i0/04iMTEOqvhNbPB0FqBYLVCgSpfeDrI4NKIbsoYPvEBGP8gE5YszMLuWUG1DfYoDdaUWmwoLK+AWV1FtQ32JBfaUR+pRH/y6lAkFqB+FA/dAv1g9bP54qh7czf1JW9umwdDTb871Qlth8vx568KlQbrX/4HEEAooPU6BLii/hQP6h95K6ppYXvc3f0yVt4U6/Off5Npjb4zlU2vhf1egOsLrhuQliYcz+M2kVQL1u2DO+99x7+9a9/oWfPnujRoweGDx8OjUYDAEhISEBubi4++ugjjBw5EiqV6qJQtlgsjm3YzhJFtPpN63h+G7/5m1vnXf1j8J/fCrAnT4/DRbXoHSmNX9Kt7bne2ID0vCrszqvGt0dKLroIicZXgaggNSICVQjzVyLYVwFZC9ZhCYIAtY8cah85QvyUiA/1O1u/CL3RioJqk+NWY7Jif0EN9hfUQOvrg57h/uge5g8/5ZWD7I/64Ir35/nsooj001X46kgpdmZXoP68nQ19fWToqQtA11A/6AKUCFApYLOLqDM3zuux0jqcrjQ65jn9tB69IwOQFBX0h/PZHC2ZX1f3yZuxV3/gbG/c3SfJB/WiRYvw0UcfYdmyZbjpppsANH5Jngvpc+Lj47F7924AQEREBMrLm559q7y8HDqdzi01tweRQWrclBiObUdK8cb/cvHabUmeLqnF6i027Mgux3+PlmLP6Sqcv3lZrZAhTuuLOI0aUUFql4bGpQiCAK2fD7R+PugbFYgGmx2nq4w4WV6PfL0RVcYGpJ/WY89pPWI1avTQ+aNziJ9Ltmm3VLWxAZ8fLsHmzEKc0f9+3HlUkAoje4Xjuu6h6B0RAMUVzsqmUMhglsnxzCcH8GueHpX1DThQWIujJXVIiw1Gn8hAyDw4j0TtmaSDeuXKlfjPf/6DV155BaNGjXIMX758Ofbt24d///vfjmHHjh1DfHw8ACA5ORkZGRmYOHEiAKCoqAhFRUVITk52a/1S95c/dcbXx8qwO7cKGfl6pMVpPF2S06w2O9JP6/HV0RLszK6A6bwl5x46f9zYNxL19RbIBLRoidlVfOQydA9rXII2W+04VVGPE2V1KKm1IF9vQr7eBKW8Et3C/NFD54+IIJXbajtSXItN+wvx7fEyx5qHAJUcYxIjcFNiOJKiApu1o2FksBqDumjRSaNGvt6EjPxqlBss2H1aj6xyA4Z3D4PWz6etZofIa0k2qHNycvD6669j5syZSEtLQ1lZmWPc8OHDsXr1aqxZswYjR47Erl27sHXrVqxbtw4AMGnSJEydOhUpKSlISkrC888/j2HDhjVrj++OIFbji/FJkdicWYQ3duXi7buSJbEH+JWU1pqx5UARth4sRoXh980bcRo1Rp8NmHidP7Raf6zZkYXiateemaw1VAoZEiICkBARAL2xAVllBmSVGWCw2HC0pA5HS+rwv1OVMEOGG+K1CPVT/vFEm6nG1ID/Hi3DpweLcKLM4BjeU+ePO1KicVNiOHxbuW1ZEAR0OrsW43ipAXvy9KgwNOCTA8X4U1cNEsIDJP8+I5ISyQb1d999B5vNhjfeeANvvPFGk3HHjx/H8uXLsWLFCixfvhwxMTF4+eWXkZqaCgBITU3Fc889hxUrVqC6uhrXXHMNFi1a5InZkLwZgzvhi8MlyCyswbfHy3BjgnT2AD9HFEVk5Fdj0/5C7Mwud6za1vr64MYEHUYnhqN3ZPOW/jxN4+uDqzppMCAuGIU1ZpworcOpSiMqDA1Y9vVxvARgYGcNxvaJxLDuoa3aMctgsWJ3bhV+yCrHjuwKx9Kzj1zAiJ463J4S3eylZ2cIgoCEiADEadX4MbsSZ6pN2HWyCqW1FlwTH+LR1f1E7Ylkg3rmzJmYOXPmZcePGDECI0aMuOz4iRMnOlZ90+XpAlS4b1Ac3vzfabyy4yT+1CUEgWppvC0abHZsO1KCDzIKcKqi3jE8NSYIt6dEY3iPMPi086tZCYKAmGA1YoLVuMZqR6WxASV1DdiTW4n003qkn9bDRy6gb2QgUmODkRIbjMSIAIReYU8Wg8WK7DIDDhXVNm7WOKNvclx4tzA/3JoUhdGJ4dD4tv2qaH+lAqMSdThQWIu9eXqcKDOgxmTFjQk6qBTt++9H5A7S+EYmj5o6IA5fHSnF6SojVu06hSdG9PBoPaYGG7YeLMb7e/NRWte4etvXR4bRiRG4IyUa3XXuPYbRXZQKGVJigzFjWA8cOFmOzw4UYduREhTWmLGvoAb7CmqA9Maz6wWqFQj180Gw2gcKuQC7CNSZG8+kdqnD0DppfTEkPgQje+nQxwNrHwRBQHJMEML8ldh+ogzFtWZ8dqgEoxN1CFDxa4joSvgJISgVMjwxogf+tukANmcWYWh8KK6JD3F7HTWmBqxNz8OHvxagytgYNmH+Stw9IBbjkyI71Bd6nNYXD1zTBTOv7ox8vQn7zuix70w19hfUoLDahFqTFbUmKwDjJZ8f5q9En8hAJMcEYWi3UHQJ8XPvDFxGjEaNcX0i8NXRMuiNDfjsUAlGJegQ4u/67fFE3qLjfPPRFQ3opMGfU6OxYV8h/vHVMayf2h+RQWq3vLa+vgH/2VeAjfsLz4YPEB2sxrSrYnFzn8gOvXr03I5ZnbS+uPXsBVXMVhuMMjlyCvSoNjbAahchCAICVXJofZWI0agl/aMmxF+JW5N+D+vPD5dgdGI4wgPdt8c7UXsi3U8zud2ca+NxoLAGR0vq8PjnR/HGHf3a9Ljjsjoz1v96BlsyixyHV3UN8cO9g+JwY0I4dza6DLWPHLFhgdDK2+/JKQJUCozrE45vjpejpNaMbUdLMToxHBEMa6KLdNxFFbqIUiHDC2MTEaxW4EhxLR799PBFZ/ZyhTN6I5Z8m4Vb39mDDzMKYLLakRARgDen9MeG+9IwpncEQ7oDUPvIMTpRh8hAFRpsIr46WoqSWtee8pHIGzCoqYlYjS+WT+wLPx859ubp8fAnh1BtvHjnpJY4XFSD+Z8fwW3v7sWWA0VosIlIjQnC8ol98f6UVIzqG+XRk5OQ+/nIZRiVqENUEMOa6HIY1HSRPlFBeHl8H6gVMuzN0+O+D/fhWElti6ZlttrxzbFSzPzPftz74X5sP1EOuwj8qYsWq/+cjNV3peDqriHt6hhoci0fuQw3JZwX1kcY1kTn4zZquqQBnTRYMykFc7ceRr7ehGkf7MOEflGYNjAOUX+wk5ldFHGoqBbbjpTg62OlqDM3XtxBIRNwU2I4pqTFeu0hVtQyPnIZRiXo8N9jZSiqMeOrI6UYlRiOSDeeUpVIqhjUdFk9wwOwbkoqln2fg2+Pl2FzZhG2ZBbhqk4a9I8LRvcwfwSqFRDFxgs7nK4y4nhpHX7N06Pa9PtlESMDVRjTJwK39Yvinr10WYqzYf31sTIU1pjx36OlGJWoQ2Swe44+IJIqBjVdkdZPiRfGJuK25Ci8szsPv+bpsefs7Ur8lXJc2y0U4/pGIC1Ow23P5BTF2dXgXx8vQ2G1GV8dLYOWx1hTB8egJqekxWmQFqfBGb0RP52sxOGiGuTrTagzNy45B6t9EB2sQg9dAPpFByEpKvCKl0WkK5M70TtnHnMhu12E3S7tY7oUchlu6qXDN8fLUVBtwkcZBRiTEoteWi5ZU8fEoKZmidX4YlL/GAAxni7FK/mrFLCLIoKCfP/wsVpt87fz2+wi9FWGdhHWN/YKw7fHy3Gm2oT71u7F8tv6IjUm2NOlEbkdg5ouSyYTIGvD45nbw9Kdu6l95JAJAj77LR9lNZe/RKdarYTJZLns+EsJDVRjfFocZDKhXfRdIZdhZIIOP+VUILu8HnM+PoiXx/fBwM5aT5dGHYQoiqgxWVFusEBvbIC8QIaSeium9I+GrxvXGDKo6ZJkMgEarT/kbRjU7WXpzhPK68xXvJa2X4OI+nrvP4RJIRNwR/9opOfV4PtjpXhk62H8c1xvj5yLnjqOWpMVx0rrcLKiHjXn7RgLAHvz9EgI9cXQ+FC31cOgpkuSyQTIZQK2ZuSjovbygdFS55bufHzksNl+P/tZS7a7XsgV0yDpUMhkeGNKf8z8917szK7A3K2H8OTInrglKdLTpZGXqapvQEa+HrmVRpxbfJAJjRe5CfHzQVigCrekxGJIVw3gxuULBjVdUUWt6YpLdi11uW2xLdnuelnc09xrqBRyLBvfB89+dRxfHi7Bom9OoLTOjBmDO7nkZDltuZmHm3ikz2y1Y0+eHsdL6hz5GxOsRq9wf8RpfaE8++M/MliNO6+KQ1WVAdY2OL3y5TCoySMutS22JdtdLyU+PBDDe0cyp72Mj1yGf9zUE+EBSqxNz8dbP59GaZ0Zj13fvVVHGLT1Zh5u4pG23Mp67DpZCWNDY/B21vpiQKdghPhJ57BABjV51PnbYl213TU0gCdV8VaCIODBIV0R5q/CS99n45MDxThZXo8Xxia2+GQ6bbmZp73twNeR2Owi9pzW41Bx4+mRg9UKDI0PQZQET7DDoCaidufO1GhEBKrwj6+OIbOwBlPX/4bFNyfgqk4t3yO8rTbzkPTUma347kQ5Susa1+D1iwpEWieNZK/ax71uiKhduq57KN6f0h89dP6orG/ArI8P4p1fTsPKJVe6ggK9CVsOFKO0zgKlXMCNvcIwqItWsiENMKiJqB2L0/ri3UkpGNcnAnYReOvn05j+4T5klxk8XRpJUHaZAV8dK4XZaofOX4mJ/aLQOcTP02X9IQY1EbVrah85nhnVC8+O7oVAlQJHS+ow5f0MvPx9jsuupU7t34HCGvyQXQFRBLqF+mFc3wgEqtvH1l8GNRF5hTG9I7Dh3jQM7xEGmwh89FsBhv7ze/w7Pc9xTnrqeERRRHpuFdJP6wEAfaMCMbxHaJuezMnVGNRE5DV0ASosvaU3XrutL+JD/VBjsmLlT7m4+a10/HN7Fk5WcJV4R2K3i9iRXYEDRY17dg/spMHgzhqXHHvvTu1juZ+IqBkGdwnBR9O0+LmgBiu+zcKpynp8nFmEjzOL0D82GDf0DMO13UIRGSS9Q3HINSxWO74+XoYzehMEANd2D0FPXYCny2oRBjUReSW5TMCE1FgMiQ3C3jw9Nu4rxI85FfjtTDV+O1ONZd/noKfOH9d0C8Wfeui4etyLVNSZ8f7efBRWm6GQCRjRMwxx2j++Ip1UMaiJyKsJgoCrOmlxVSctimtM+PZ4GX7KqUBmYQ1OlBlwosyAtbvzAAABKjlC/ZQI9lUgWO0Dja8CGl8fqH3kHp4LclZhtRGzPj6EwmozVAoZRiXoWnwyHKlgUBNRhxEZpMbUq+Iw9ao4VNVb8L9TlcgsrMGRkjqcKKlDndmGOrMRqGr6PJVChiC1AsFqxdl/fRB09v8Mcek4UVqH/9tyCOUGC4LVCtyYoIPG18fTZbUag5qIOiStnxJj+0RifHI0tFp/vL79GA4V1kJvbEC1sQF6oxXVpgbUmW0wW+0oq7OgrO7ic9Gr5DJHaIf4+yDUT4mwACV8GeBulZGvx9yth2Gw2JAQGYhRCToYvGRzBoOaiAiNV+iKCVYj5oJzPVttdlSbrKg5e6s2NTj+b7DYYLbZUWawoMxgQU7F78/zV8oRHaxGTYOIxDA/9IkI4NJ3G/nuRBme3nYMDTYR/eOCsXb6IHycnsugJiLqCBRyGUL9lQj1v/hqSlabHTVmK6qNVlSbrKg0WFBusKD6bIhnlRmw/LusxunIBCRGBKJ/XDD6xwYjJSYYfkoGd2uIooj1v57Baz+egghgeI8wLLklEcFesLr7fAxqIqIWUshlCPFTXnRJxAabHRWGBljsdviqfPBLTgVKas04WFSDg0U1eG9PPnzkAlJjgnFNfAiu6RqCTlrfdnd8rydZbXYsPXsFNQC4MyUajwzvBpXC+378MKiJiFzMRy5DZJAKkcFqzBjWA5WVdcirrMdv+Y2Hhv2Wr0dhjRl78vTYk6fHv3acRKxGjWu6huDqriFIi9NApeD5qC6nzmzF/M+PYvfpKggAHhneDXf1j/F0WW2GQU1E1MYEQUBMsC9ign0xrm8kRFFEXpUR/ztVif+drMRvZ6pxRm/Chn2F2LCvEGqFDAM7azE0PgRDuoUi7BKr3YHGa2nLXHgqTLm86Y8Du12U3HW0s8sMePzzI8irMkKtkGHxzYm4rnuop8tqUwxqImoXLgwRVz2vpdNtDUEQ0DnED51D/DA5LRb1Fhv25lU5gru0zoIfcyrwY04F8G0WekcGYmh8CIZ2C0VPnT8EoTGgNVp/l56zWqv1b3LfZhehrzJIJqy/PFyCJduzYLbaERGowrJbeyMxItDTZbU5BjURSZq/SgG7KCIoqGVnlrowfC7Lg9uH/ZRyXNc9DNd1D4MoisgqM+CnkxX4KacSh4trceTs7a2fTyMiUIWh8SEY1jMMI4J88fm+QlTUmlpdg1qthMn0++FnoYFqjE+Lg0wmeDyojQ02vPJDDrYebNwePbiLFotGJ0Dj5107jV0Og5qIJE3tI4dMEPDZb/koq2leIF0YPpcSHx6I4b0jPZnTTQiCgJ7hAegZHoAZgzujvM6MXScr8dPJSqSfrkJJrdlx3nK/T48gTuuLiAAlOml9W3Xstl+DiPp6swvnxDX25lXh+W+yUFDdeM7uv1zdGdMHdWpXV79qLQY1EbUL5XVmFFc3L6idCZ/QAGmfXjIsQIXx/aIwvl8UTA02/Jqvx085lfjpZAXK6iw4XlKH4yWNjw0/G9idQ3yh9fVp13uR15mtWL7zpGMpOiJQhadv7IlBXbQersz9GNRERO2E2keOIfGhGBIfiqfkPVFQb8Oyr47gSHEtyg0NKK2zoLTOgl/zqxGgajyBS3SwGtFB6nZzzLbFasfWg0VYszsPlfUNAIDbk6Pw0NCuCFB1zMjqmHNNRNTOCYKApNhgXNcjDL3CA2AwW5GnN+J0pRGF1SbUmW04XmrA8dLGa3BrfX3OBrcKUUFqKCV2+JfVLmLb4RK8/ctpFNc2rgXppPXFght7oH+sxrPFeRiDmojIC/irFEiMCERiRCAabHYU1ZhRWG1CYbUJFfUNqDI23g4V10IAEOrvg/AAFSICVQgPVMHX99KHgLW1ynoLvjhUgo8zC1FU0xjQugAlZgzuhFv6RsLHA3vlSw2DmojIy/jIZeik9UWns9dgNjXYUHg2uAuqTagxWVFuaEC5oQFHSuoANO55rjt7qtQQPx8oFbI22dtbFEXk601IP12Fn3IqsOd0FWxnX0bj64NpA+Nwe3IUz4t+HgY1EZGXU/vIER/qh/hQPwCNO2qV1JpRWmdBSa0ZFQYL6i02nLYYcbrKCADYfqIca9Pz0S3UD11C/BCjUSMm2BfRwWroApTQ+Pr84V7m1rNL9rmV9citrEdORT0y8vSOVdvn9IkMxMTkKNzYS8eAvgQGNRFRBxOgUiBApUC3sMZjzK02O+psQH55HSrrG1BZb0GV0Yp6iw0Hi2pxsKj2ktNRKWQIViugVMggFwTIZQJEEaizWFFrssJktV/yeT5yAf2igzCwkxY39AxD5xC/NptXb+C1QW02m/Hss8/im2++gVqtxvTp0zF9+nRPl9Um2uLMSp44WxMReYZCLkN0oAoa5e+f+/BAFYb3jcLerDLkVtSj4Oxq88JqEyrrLWiwiTBb7Si9xDW6z6dSyNBZ64suIX7oHOKLpOggpMQE83rdzeC1Qb106VIcOnQI7733HgoLC/H4448jOjoao0aN8nRpLtPaMzY5pR0fh0mXxh925AyZTED38ECE+shgvWDJWBRF1DfYUFXfeG1uq12E1W6H7ew27cCzS+yBKgUC1QrI+D3SKl4Z1PX19di0aRPefvtt9OnTB3369EFWVhY++OADrwrq1pyx6Y9I7WxN1Hr8Yec53vbjSBAE+CsV8Fd6ZYRIjld2+dixY7BarUhNTXUMS0tLw5tvvgm73Q6ZzLt+/bfkjE1/ROpna6Lm4w879/OGH0dt9YNAilfmkiqvDOqysjJotVoolb8fFxgWFgaz2Qy9Xo+QkBCnpiOTAWIr30fnPkORwb4uPx4w5GyYRgapoXDxh7Utp33J6QuASuUDs68caGXPvaovl9LCXp2btkImuPy9eO68y5LquZN9asu/abTWDzJBwO7sMlQbrrwtt7kiNb5I7hyCqGA1fFp73utL9Cpa69emPzLsdhG1tUaXh/W5HxYRbfCde24BRhAa88FdBFFsbRRJz9atW7F8+XL88MMPjmH5+fkYMWIEdu7cicjISA9WR0RE5DzvWgd8lkqlgsXS9NfruftqtdoTJREREbWIVwZ1REQEqqqqYLVaHcPKysqgVqsRFBTkwcqIiIiaxyuDOjExEQqFAvv373cMy8jIQFJSktftSEZERN7NK1PL19cX48ePx8KFC3HgwAFs374d7777Lu655x5Pl0ZERNQsXrkzGQAYjUYsXLgQ33zzDQICAjBjxgzce++9ni6LiIioWbw2qImIiLyBV676JiIi8hYMaiIiIgljUBMREUkYg9oFzGYznnzySQwYMABDhgzBu+++e9nHHjlyBHfccQeSk5Nx22234dChQ26s1LOa06cdO3bg1ltvRWpqKsaNG4fvvvvOjZV6XnN6dc6ZM2eQmpqK9PR0N1QoDc3p0/HjxzFp0iT069cP48aNw+7du91Yqec1p1fffvstRo8ejdTUVEyaNAmHDx92Y6XSYLFYMHbs2Ct+ntz2fS5Sqz333HPiuHHjxEOHDonffPONmJqaKn711VcXPc5gMIjXXHON+OKLL4rZ2dniokWLxKuvvlo0GAweqNr9nO3T0aNHxT59+ojvvfeemJubK65fv17s06ePePToUQ9U7RnO9up8M2bMEHv27Cnu3r3bTVV6nrN9qqmpEa+++mrxqaeeEnNzc8Xly5eLaWlpYnl5uQeq9gxne3XixAkxKSlJ/OSTT8TTp0+Lzz77rHjNNdeI9fX1HqjaM0wmk/jQQw9d8fPkzu9zBnUrGQwGMSkpqckfc9WqVeKUKVMueuymTZvE66+/XrTb7aIoiqLdbhdHjhwpbt682W31ekpz+rRs2TJxxowZTYZNnz5dfOWVV9q8TiloTq/O+fTTT8W77rqrQwV1c/r03nvviSNGjBCtVqtj2MSJE8UdO3a4pVZPa06v1q5dK06YMMFxv7a2VuzZs6d44MABt9TqaVlZWeItt9wijhs37oqfJ3d+n3PVdytd7pKamZmZsNubXmw9MzMTaWlpEM5eoUcQBPTv37/JGdS8VXP6NGHCBDz66KMXTaO2trbN65SC5vQKAKqqqrBs2TI899xz7izT45rTpz179uCGG26AXC53DNu8eTOuu+46t9XrSc3plUajQXZ2NjIyMmC327FlyxYEBASgU6dO7i7bI/bs2YNBgwZhw4YNV3ycO7/PvfIyl+7UnEtqlpWVoXv37k2eHxoaiqysLLfV6ynN6VO3bt2aPDcrKwu//PIL7rrrLrfV60nNvUzriy++iAkTJqBHjx7uLtWjmtOn/Px89OvXD08//TS+//57xMTE4PHHH0daWponSne75vRqzJgx+P777zF58mTI5XLIZDK89dZbCA4O9kTpbjd58mSnHufO73MuUbeS0Whs8uYH4Lh/4RW8LvfYCx/njZrTp/NVVlZi9uzZ6N+/P2644YY2rVEqmtOrn3/+GRkZGXjwwQfdVp9UNKdP9fX1WL16NXQ6Hd5++21cddVVmDFjBoqKitxWryc1p1dVVVUoKyvDM888g40bN+LWW2/F/PnzUVFR4bZ62wN3fp8zqFupOZfUvNxjO8KlN1ty6dHy8nJMmzYNoihixYoVHeaCKs72ymQy4ZlnnsE//vGPDvEeulBz3lNyuRyJiYmYM2cOevfujXnz5qFLly749NNP3VavJzWnVy+99BJ69uyJu+++G3379sWiRYvg6+uLzZs3u63e9sCd3+cd45uvDTXnkpoREREoLy9vMqy8vBzh4eFuqdWTmnvp0ZKSEtx9992wWCxYt27dRat7vZmzvTpw4ADy8/MxZ84cpKamOrY//uUvf8Ezzzzj9rrdrTnvKZ1Oh/j4+CbDunTp0mGWqJvTq8OHDyMhIcFxXyaTISEhAYWFhW6rtz1w5/c5g7qVmnNJzeTkZOzbtw/i2dOri6KI3377DcnJye4s2SOa06f6+nrcf//9kMlkWL9+PSIiItxcrWc526t+/frhm2++wdatWx03AFi8eDH+7//+z81Vu19z3lMpKSk4fvx4k2EnT55ETEyMO0r1uOb0Kjw8HDk5OU2GnTp1CrGxse4otd1w6/e5y/cj74Cefvpp8eabbxYzMzPFb7/9Vuzfv7/49ddfi6IoiqWlpaLRaBRFsfEwh8GDB4uLFi0Ss7KyxEWLFonXXHNNhzmO2tk+vfLKK2K/fv3EzMxMsbS01HGrqanxZPlu5WyvLtSRDs8SRef7dObMGTElJUVcsWKFmJubK7766qtiSkqKWFxc7Mny3crZXn355ZeO46hzc3PFZcuWdbhjzs+58PPkqe9zBrUL1NfXi4899piYkpIiDhkyRFy7dq1jXM+ePZscV5eZmSmOHz9eTEpKEm+//Xbx8OHDHqjYM5zt00033ST27Nnzotvjjz/uocrdrznvqfN1tKBuTp9+/fVXccKECWLfvn3FW2+9VdyzZ48HKvac5vRq48aN4qhRo8SUlBRx0qRJ4qFDhzxQsedd+Hny1Pc5L3NJREQkYdxGTUREJGEMaiIiIgljUBMREUkYg5qIiEjCGNREREQSxqAmIiKSMAY1ERGRhDGoiYiIJIxBTSQxr732GqZOnYotW7bg+uuvdwzfsGEDBg8ejNTUVGRlZeHpp59GSkpKh7n8Z1v76quveClHkiSFpwsgoksbM2YMhg0b5ri/bNky3HPPPbjttttQVVWFjRs3YvXq1ejVq5fnivQSBQUF+Pvf/47vvvvO06UQXYRBTSRRarW6ybVta2trMXDgQMTExKCgoAAAcO2110IQBE+V6DV4JmWSMq76JvKw7OxsTJo0CcnJybjnnntQVVUFAE1WfZ9bap42bRqmTp2KqVOnAgASEhLw2muv/eFrTJ06FStXrnS8zuTJk5tcyrC4uBj/93//h4EDB2LQoEFYvHgxLBYLAKChoQFPPfUUBg0ahNTUVPz1r39FSUmJU/P2R69bVFSEv/71r0hOTsb111+PlStXwmazOeb/rrvuwkMPPYS0tDR89tlnsFqteOWVVzBkyBCkpaVhzpw5jn5ZLBYsXrwYgwYNwqBBg/Doo49Cr9cDAM6cOYNevXrhm2++wYgRI5CUlIQHHnjAMf7c5oMbbrgBW7ZscWreiNyFQU3kQRaLBTNnzkRcXBy2bNmCm266CRs2bLjocbt27QLQuP361VdfdYTzrl27MH36dKde66233sJNN92ELVu2ICIiAjNnzoTFYoHFYsG0adNgNBrx/vvv49VXX8WOHTuwdOlSAMAHH3yAvXv34t1338XHH38Mg8GAF154wel5vNzriqKIWbNmITQ0FJ988gmWLFmCzz//HG+++abjufv27UP37t2xceNGDBkyBMuXL8cnn3yCF154ARs2bEBFRQX+8Y9/AABeeeUVHDp0CG+//TbWrVuHurq6i67L/eabb+KVV17B+vXrcfDgQaxduxYAsGnTJse/Y8aMcXreiNyiTa7JRURO+eGHH8TU1NQm17CdM2eOOGXKFHHz5s3i8OHDHcPPv+Te7t27xZ49ezr9OlOmTBEffPBBx/3a2loxJSVF/P7778Xt27eLycnJol6vd4zfuXOn2Lt3b7Gurk5ctGiROG7cOLGqqkoUxcZrOzt72cMrve7PP/8sDh48WLTZbI7x3333nThw4EBRFEVx8+bNYq9evRzX/7Xb7eLAgQObXGYwKytLXLFihVhfXy/26dNHPHbsmGNcdXW1mJCQIB47dkzMz88Xe/bsKf7www+O8S+88IJ43333iaIoOsbn5+c7NV9E7sRt1EQelJ2djS5dusDPz88xLCkpCTt37nT5a/Xv39/x/4CAAHTt2hU5OTmw2+3o0qULgoODmzzWarUiLy8Pf/7zn/Hll19iyJAhGDhwIEaMGIGJEye2+nXVajX0ej3S0tIc4+12O0wmk2N1dmhoqGM7fVVVFfR6Pfr06eN4fPfu3TF79mycOHECDQ0NuOuuu5q8tt1uR25uruM5nTt3blJLQ0OD0/NB5CkMaiIPEy/YkcnHx6dNXkehaPpxt9lskMlkl3y9c9uJbTYbEhMT8f3332PHjh3YsWMHXnnlFXzxxRf44IMPnNqR7XKva7VaER8fj9dff/2i5wQGBgIAVCrVZadzqXo//PDDJj96gMawP7ctuq16S9SWuI2ayIN69OiB3Nxc1NbWOoYdPXq0TV7r2LFjjv/X1tYiLy8PvXr1QteuXZGbm+sIMwDYv38/FAoFOnXqhK1bt+KHH37A6NGj8c9//hPvvPMOMjIynD7m+EqvW1hYiJCQEHTu3BmdO3fGmTNnsGLFikv+AAgKCoJWq20yvaNHj+Laa69FXFwc5HI59Hq9Y1oBAQFYsmSJU3Vyz3mSMgY1kQddffXViIqKwoIFC5CTk4MtW7Zg27ZtbfJan3/+ObZu3YqcnBwsWLAA0dHRGDRoEK655hrExcXhsccew/Hjx7F7924sWrQIY8eORVBQEGpra/H888/jl19+QX5+Pj7//HNERkZCq9W26nWHDBmCmJgYzJs3D8ePH8evv/6Kp59+Gr6+vpDL5Zec1tSpU7F8+XLs3r0bWVlZeP7555GSkoKAgADccccdWLhwIdLT05GdnY3HHnsMp0+fRmxs7B/W6OvrC6DxR4XBYHC+qURuwKAm8iAfHx+89dZbqK6uxoQJE/DRRx/h7rvvbpPXGjduHP7zn/9g4sSJMBgMePvtt6FQKCCXyx2rn++880488sgjuOGGG/Dcc88BAO6++26MHz8e8+bNw5gxY3DkyBG88cYblw3T5rzuG2+8AbvdjjvvvBOzZ8/Gddddh6eeeuqy05o5cyZuvPFG/P3vf8ekSZMQGRmJRYsWAQCeeOIJ/OlPf8KcOXNw5513QqFQYPXq1U7VGRISgltuuQV///vfHXuAE0mFIF64gYyIvM7UqVMxcOBAzJ49u0O8LpE34RI1ERGRhHGvb6J27vnnn8fHH3982fEPPPCAV70uUUfDVd9E7VxlZWWTvcYvFBwcDI1G4zWvS9TRMKiJiIgkjNuoiYiIJIxBTUREJGEMaiIiIgljUBMREUkYg5qIiEjCGNREREQSxqAmIiKSsP8H2GIqn81sIzoAAAAASUVORK5CYII=" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.displot(\n", + " df_stats_ext_sum,\n", + " x=\"diff_pos_percent\",\n", + " kde=True,\n", + ")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-19T10:28:23.548919Z", + "start_time": "2023-07-19T10:28:22.673989Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 415, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(6, 6))\n", + "sns.heatmap(\n", + " pd.crosstab(df_stats_ext_sum['total_sub'], df_stats_ext_sum['diff_pos']),\n", + " annot=True,\n", + " fmt=\".0f\",\n", + ").set(title='#total_sub vs #diff_pos in sentences')\n", + "plt.show()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-19T10:24:00.116236Z", + "start_time": "2023-07-19T10:23:59.016911Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Analyse Syntax Trees (using Stanza & Tree Edit Distance)" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# process saved stanza sentences to original format\n", + "\n", + "def list_to_stanza_sentence(token_list, annotations_list) -> StanzaSentence:\n", + " tokens = []\n", + " for i, (token, anotation) in enumerate(zip(token_list, annotations_list), start=1):\n", + " token_dict = dict(\n", + " id=i,\n", + " text=token,\n", + " lemma=anotation.get('lemma'),\n", + " upos=anotation.get('upos'),\n", + " xpos=anotation.get('xpos'),\n", + " feats=anotation.get('feats'),\n", + " head=anotation.get('head'),\n", + " deprel=anotation.get('deprel'),\n", + " misc='start_char={}|end_char={}'.format(anotation.get('start_char'), anotation.get('end_char')),\n", + " ner=anotation.get('ner'),\n", + " )\n", + "\n", + " tokens.append(token_dict)\n", + "\n", + " sentence = StanzaSentence(tokens=tokens)\n", + " return sentence" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 304, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 22/5160 [00:01<06:32, 13.08it/s]19-Jul 11:30:28 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 2%|▏ | 109/5160 [00:06<06:06, 13.76it/s]19-Jul 11:30:33 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 7%|▋ | 386/5160 [00:33<05:47, 13.72it/s]19-Jul 11:31:00 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:31:00 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 9%|▉ | 462/5160 [00:42<04:36, 17.01it/s]19-Jul 11:31:09 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 14%|█▍ | 724/5160 [01:06<14:20, 5.15it/s]19-Jul 11:31:32 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:31:32 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 14%|█▍ | 738/5160 [01:06<04:37, 15.93it/s]19-Jul 11:31:33 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:31:33 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 19%|█▉ | 974/5160 [01:21<02:10, 31.98it/s]19-Jul 11:31:48 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 20%|█▉ | 1015/5160 [01:23<02:57, 23.34it/s]19-Jul 11:31:49 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 22%|██▏ | 1123/5160 [01:26<02:24, 27.99it/s]19-Jul 11:31:53 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 23%|██▎ | 1175/5160 [01:29<02:29, 26.71it/s]19-Jul 11:31:55 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 24%|██▍ | 1242/5160 [01:31<02:34, 25.42it/s]19-Jul 11:31:58 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 24%|██▍ | 1260/5160 [01:32<03:43, 17.42it/s]19-Jul 11:31:59 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 27%|██▋ | 1394/5160 [01:39<03:37, 17.31it/s]19-Jul 11:32:06 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:32:06 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 30%|██▉ | 1538/5160 [01:46<02:30, 24.05it/s]19-Jul 11:32:13 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:32:13 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 38%|███▊ | 1935/5160 [02:11<04:04, 13.19it/s]19-Jul 11:32:37 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 41%|████ | 2090/5160 [02:20<02:30, 20.35it/s]19-Jul 11:32:47 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:32:47 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 43%|████▎ | 2228/5160 [02:27<02:17, 21.36it/s]19-Jul 11:32:54 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:32:54 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 49%|████▊ | 2512/5160 [02:46<04:09, 10.62it/s]19-Jul 11:33:12 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:33:12 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 60%|█████▉ | 3092/5160 [03:29<02:34, 13.40it/s]19-Jul 11:33:56 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 65%|██████▌ | 3373/5160 [03:52<02:09, 13.85it/s]19-Jul 11:34:19 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:34:19 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 67%|██████▋ | 3443/5160 [03:57<01:31, 18.77it/s]19-Jul 11:34:24 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 84%|████████▎ | 4309/5160 [04:38<00:50, 16.71it/s]19-Jul 11:35:05 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 84%|████████▍ | 4339/5160 [04:41<01:39, 8.23it/s]19-Jul 11:35:07 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 86%|████████▌ | 4427/5160 [04:50<01:11, 10.23it/s]19-Jul 11:35:17 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:35:17 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 87%|████████▋ | 4473/5160 [04:54<00:52, 13.00it/s]19-Jul 11:35:20 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:35:20 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 88%|████████▊ | 4520/5160 [04:58<01:05, 9.77it/s]19-Jul 11:35:25 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 89%|████████▉ | 4613/5160 [05:08<00:47, 11.50it/s]19-Jul 11:35:35 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 91%|█████████ | 4693/5160 [05:16<00:50, 9.33it/s]19-Jul 11:35:42 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:35:42 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 91%|█████████ | 4702/5160 [05:16<00:42, 10.67it/s]19-Jul 11:35:43 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 91%|█████████▏| 4721/5160 [05:19<00:46, 9.50it/s]19-Jul 11:35:46 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:35:46 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 92%|█████████▏| 4749/5160 [05:22<00:34, 11.84it/s]19-Jul 11:35:49 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "19-Jul 11:35:49 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 93%|█████████▎| 4815/5160 [05:28<00:38, 8.94it/s]19-Jul 11:35:55 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 97%|█████████▋| 4985/5160 [05:42<00:14, 11.82it/s]19-Jul 11:36:09 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 97%|█████████▋| 5030/5160 [05:47<00:18, 7.12it/s]19-Jul 11:36:14 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 99%|█████████▉| 5133/5160 [05:56<00:02, 9.15it/s]19-Jul 11:36:22 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "100%|██████████| 5160/5160 [05:58<00:00, 14.41it/s]\n" + ] + }, + { + "data": { + "text/plain": " unit_id lang_id ted\n0 flores101-main-ukr-100-pe1-1 ukr 14\n1 flores101-main-ukr-100-pe1-2 ukr 4\n2 flores101-main-ukr-100-pe1-3 ukr 9\n3 flores101-main-ukr-100-pe1-4 ukr 12\n4 flores101-main-ukr-100-pe1-5 ukr 14\n... ... ... ...\n5155 flores101-main-vie-106-pe2-4 vie 16\n5156 flores101-main-vie-48-pe1-1 vie 11\n5157 flores101-main-vie-48-pe1-2 vie 27\n5158 flores101-main-vie-48-pe1-3 vie 16\n5159 flores101-main-vie-48-pe1-4 vie 10\n\n[5160 rows x 3 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
unit_idlang_idted
0flores101-main-ukr-100-pe1-1ukr14
1flores101-main-ukr-100-pe1-2ukr4
2flores101-main-ukr-100-pe1-3ukr9
3flores101-main-ukr-100-pe1-4ukr12
4flores101-main-ukr-100-pe1-5ukr14
............
5155flores101-main-vie-106-pe2-4vie16
5156flores101-main-vie-48-pe1-1vie11
5157flores101-main-vie-48-pe1-2vie27
5158flores101-main-vie-48-pe1-3vie16
5159flores101-main-vie-48-pe1-4vie10
\n

5160 rows × 3 columns

\n
" + }, + "execution_count": 304, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# use astred to calculate tree edit distance\n", + "\n", + "df_synt_scores = pd.DataFrame()\n", + "\n", + "langs = {\n", + " 'vie': 'vi',\n", + " 'tur': 'tr',\n", + " 'ukr': 'uk',\n", + " 'ara': 'ar',\n", + " 'ita': 'it',\n", + " 'nld': 'nl',\n", + "}\n", + "\n", + "for _id, x in tqdm(df.iterrows(), total=len(df)):\n", + " pe_tokens = eval(x['tgt_tokens'])\n", + " pe_annotations = eval(x['tgt_annotations'])\n", + " mt_tokens = eval(x['mt_tokens'])\n", + " mt_annotations = eval(x['mt_annotations'])\n", + " mt_tbd_qe = eval(x['mt_tbd_qe'])\n", + " mt_pe_alignments_raw = eval(x['mt_pe_tbd_qe_alignments'])\n", + " mt_pe_alignments_dict = defaultdict(list)\n", + "\n", + " for k, v, score in mt_pe_alignments_raw:\n", + " if k is not None:\n", + " mt_pe_alignments_dict[k].append(v)\n", + "\n", + " mt_pe_alignments_pairs = [(k, v[0]) for k, v in mt_pe_alignments_dict.items() if len(v) > 0 and v[0] is not None]\n", + "\n", + " # fix 2 sentences examples with 2 heads to be 1 headed (match to first head\n", + " def _to_int(x, max_value = None):\n", + " try:\n", + " values = [int(i) for i in x.split('+')]\n", + " if 0 in values:\n", + " val = 0\n", + " else:\n", + " val = values[0]\n", + " except AttributeError:\n", + " val = int(x)\n", + " if max_value:\n", + " # fix index error when some tree indexes greater than tree length\n", + " val = min(val, max_value)\n", + " return val\n", + "\n", + " try:\n", + " mt_annotations = [{**annotation, 'head': _to_int(annotation['head'], max_value=len(mt_annotations)-1)} for annotation in mt_annotations]\n", + " first_head, *other_heads = [i for i, annotation in enumerate(mt_annotations) if annotation['head'] == 0]\n", + " mt_annotations = [{**annotation, 'head': first_head} if i in other_heads else annotation for i, annotation in enumerate(mt_annotations)]\n", + "\n", + " pe_annotations = [{**annotation, 'head': _to_int(annotation['head'], max_value=len(pe_annotations)-1)} for annotation in pe_annotations]\n", + " first_head, *other_heads = [i for i, annotation in enumerate(pe_annotations) if annotation['head'] == 0]\n", + " pe_annotations = [{**annotation, 'head': first_head} if i in other_heads else annotation for i, annotation in enumerate(pe_annotations)]\n", + " except ValueError:\n", + " print('VALUE ERROR')\n", + " print('mt_annotations', [i['head'] for i in mt_annotations])\n", + " print('pe_annotations', [i['head'] for i in pe_annotations])\n", + " _df_synt_scores = pd.DataFrame([{\n", + " 'unit_id': _id,\n", + " 'lang_id': x['lang_id'],\n", + " 'ted': None,\n", + " }])\n", + " df_synt_scores = pd.concat([df_synt_scores, _df_synt_scores], ignore_index=True)\n", + " continue\n", + "\n", + " try:\n", + " sent_mt = Sentence.from_parser(list_to_stanza_sentence(mt_tokens, mt_annotations))\n", + " sent_pe = Sentence.from_parser(list_to_stanza_sentence(pe_tokens, pe_annotations))\n", + "\n", + " aligned = AlignedSentences(\n", + " sent_mt,\n", + " sent_pe,\n", + " word_aligns=mt_pe_alignments_pairs,\n", + " )\n", + " ted = aligned.ted\n", + " except IndexError:\n", + " print('INDEX ERROR')\n", + " print('mt_annotations', [i['head'] for i in mt_annotations])\n", + " print('pe_annotations', [i['head'] for i in pe_annotations])\n", + " ted = None\n", + "\n", + " _df_synt_scores = pd.DataFrame([{\n", + " 'unit_id': _id,\n", + " 'lang_id': x['lang_id'],\n", + " 'ted': int(ted),\n", + " }])\n", + " df_synt_scores = pd.concat([df_synt_scores, _df_synt_scores], ignore_index=True)\n", + "\n", + "df_synt_scores['ted'] = df_synt_scores['ted'].astype(int)\n", + "\n", + "df_synt_scores" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-19T09:36:35.619460Z", + "start_time": "2023-07-19T09:30:26.771815Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 419, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjgAAAHpCAYAAACP/0bhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAACQ8klEQVR4nOzdd3xc1Z338c+d3jWSRl2yJEvuvWCqceiJgQCGkBACS0ISEgzZ3XQgnRASyCb7rDEhQCAJvRNCC53QDe7dkqxeZyRN0/SZ+/wxtmwhG6vZkoffm9dFmlvPHMuar8899xxFVVUVIYQQQogMohnvAgghhBBCjDUJOEIIIYTIOBJwhBBCCJFxJOAIIYQQIuNIwBFCCCFExpGAI4QQQoiMIwFHCCGEEBlHAo4QQgghMo5uvAsw3jyeAMMZ6jAnx0pPT9/hK9BRQuphH6mLNKmHNKmHfaQu0vLy7ONdhE8lacEZBkUBrVaDoox3ScaX1MM+UhdpUg9pUg/7SF2kfdrf/3iSgCOEEEKIjCMBRwghhBAZRwKOEEIIITKOBBwhhBBCZBwJOEIIIYTIOBJwhBBCCJFxJOAIIYQQIuNIwBFCCCFExpGAI4QQQoiMIwFHCCGEEBlHAo4QQgghMo4EHCGEEEJkHAk4QgghhMg4EnCEEEIIkXEk4AghhBAi40jAEUIIIUTGkYAjhBBCiIwjAUcIIYQQGUc33gU42iRTSWwOw6jPo6YgGIiNQYmEEEII8XEScIZJRWV7Wy2qOrrzzCypHpsCCSGEEGIQuUUlhBBCiIwjAUcIIYQQGUcCjhBCCCEyjgQcIYQQQmQcCThCCCGEyDgScIQQQgiRcSTgCCGEECLjSMARQgghRMaRgCOEEEKIjCMBRwghhBAZRwKOEEIIITKOBBwhhBBCZBwJOEIIIYTIOBJwhBBCCJFxJOAIIYQQIuNIwBFCCCFExpGAI4QQQoiMIwFHCCGEEBlHAo4QQgghMo4EHCGEEEJkHAk4QgghhMg4EnCEEEIIkXEk4AghhBAi40jAEUIIIUTGkYAjhBBCiIwjAUcIIYQQGUcCjhBCCCEyjgQcIYQQQmQcCThCCCGEyDgScIQQQgiRcSTgCCGEECLjSMARQgghRMaRgCOEEEKIjDNhAs43v/lNfvzjH/e/3rZtG1/4wheYN28eF154IVu2bBmw/7PPPsvpp5/OvHnzWLlyJT09PUe6yEIIIYSYoCZEwHnuued48803+1+HQiG++c1vsnjxYp588kkWLFjAVVddRSgUAmDTpk3ccMMNXHPNNTzyyCP4/X6uu+668Sq+EEIIISaYcQ84Xq+XW265hTlz5vSve/755zEajfzwhz+kqqqKG264AavVyosvvgjA/fffz+c+9znOP/98pk+fzi233MKbb75Jc3PzeL0NIYQQQkwg4x5wfve733HeeedRXV3dv27jxo0sWrQIRVEAUBSFhQsXsmHDhv7tixcv7t+/qKiI4uJiNm7ceETLLoQQQoiJSTeeF3/vvff46KOP+Oc//8kvfvGL/vVut3tA4AHIzc2lpqYGgK6uLvLz8wdt7+joGHYZ9mSoYe07nGPG6toTyVjXw9FM6iJN6iFN6mEfqYu0T/v7H0/jFnCi0Sg///nP+dnPfobJZBqwLRwOYzAYBqwzGAzEYjEAIpHIJ24fjtxc+7D2T6QSOJ3WYV/n43Q6LS7X8K490Qy37jKZ1EWa1EOa1MM+UhdivIxbwLntttuYPXs2S5cuHbTNaDQOCiuxWKw/CB1su9lsHnY5ursDqOrQ9lUUyMo24/X2DfmYgyk0J/H6w6M7yThRlPQvreHUXaaSukiTekiTethH6iJtbz2II2/cAs5zzz2Hx+NhwYIFAP2B5V//+hfnnHMOHo9nwP4ej6f/tlRBQcEBt+fl5Q27HKrKsP/yjeSYg53naDZW9ZAJpC7SpB7SpB72kboQ42XcAs59991HIpHof/373/8egO9///t8+OGH3HXXXaiqiqIoqKrKunXr+Na3vgXAvHnzWLt2LStWrACgvb2d9vZ25s2bd+TfiBBCCCEmnHELOCUlJQNeW63pfi3l5eXk5ubyP//zP9x000186Utf4uGHHyYcDvO5z30OgEsuuYTLLruM+fPnM2fOHG666SY+85nPUFZWdsTfhxBCCCEmnnF/TPxAbDYbf/7zn/tbaTZu3Midd96JxWIBYMGCBfzqV79i9erVXHLJJWRlZXHzzTePc6mFEEIIMVEoqvrpvjvq8Qyvk7Ezx8zmpp2jvqc8s6SagG/4T31NBIoCLpd9WHWXqaQu0qQe0qQe9pG6SNtbD+LIm5AtOEIIIYQQoyEBRwghhBAZRwKOEEIIITLOuE7VcDTSqCpTCyaN+jx6jWRLIYQQ4nCRgDNMCtC46+1Rd5qbMfu0MSmPEEIIIQaTZgQhhBBCZBwJOEIIIYTIOBJwhBBCCJFxJOAIIYQQIuNIwBFCCCFExpGAI4QQQoiMIwFHCCGEEBlHAo4QQgghMo4EHCGEEEJkHAk4QgghhMg4EnCEEEIIkXEk4AghhBAi40jAEUIIIUTGkYAjhBBCiIwjAUcIIYQQGUcCjhBCCCEyjgQcIYQQQmQcCThCCCGEyDgScIQQQgiRcSTgCCGEECLjSMARQgghRMaRgCOEEEKIjCMBRwghhBAZRwKOEEIIITKOBBwhhBBCZBwJOEIIIYTIOBJwhBBCCJFxJOAIIYQQIuNIwBFCCCFExpGAI4QQQoiMIwFHCCGEEBlHAo4QQgghMo4EHCGEEEJkHAk4QgghhMg4EnCEEEIIkXEk4AghhBAi40jAEUIIIUTGkYAjhBBCiIwjAUcIIYQQGUcCjhBCCCEyjgQcIYQQQmQcCThCCCGEyDgScIQQQgiRcSTgCCGEECLjSMARQgghRMaRgCOEEEKIjCMBRwghhBAZRwKOEEIIITKOBBwhhBBCZBwJOEIIIYTIOBJwhBBCCJFxJOAIIYQQIuNIwBFCCCFExpGAI4QQQoiMIwFHCCGEEBlHAo4QQgghMo4EHCGEEEJkHAk4QgghhMg4EnCEEEIIkXEk4AghhBAi40jAEUIIIUTGkYAjhBBCiIwjAUcIIYQQGUcCjhBCCCEyjgQcIYQQQmQcCThCCCGEyDgScIQQQgiRcSTgCCGEECLjSMARQgghRMaRgCOEEEKIjCMBRwghhBAZRwKOEEIIITKOBBwhhBBCZBwJOEIIIYTIOBJwhBBCCJFxxjXgNDY2cuWVV7JgwQI+85nPcPfdd/dva25u5oorrmD+/PksX76ct99+e8Cx7777Lueccw7z5s3j8ssvp7m5+UgXXwghhBAT1LgFnFQqxTe/+U2ys7N56qmn+OUvf8mf/vQn/vnPf6KqKitXrsTlcvHEE09w3nnncc0119DW1gZAW1sbK1euZMWKFTz++OPk5ORw9dVXo6rqeL0dIYQQQkwguvG6sMfjYcaMGfziF7/AZrNRUVHB8ccfz9q1a3G5XDQ3N/Pwww9jsVioqqrivffe44knnuDaa6/lscceY/bs2Xzta18D4Oabb+bEE09kzZo1HHvsseP1loQQQggxQYxbC05+fj7/+7//i81mQ1VV1q5dy4cffsiSJUvYuHEjM2fOxGKx9O+/aNEiNmzYAMDGjRtZvHhx/zaz2cysWbP6twshhBDi023cWnD2d+qpp9LW1sYpp5zCWWedxW9+8xvy8/MH7JObm0tHRwcAbrf7E7cPh6IMf9/hHDNW155IxroejmZSF2lSD2lSD/tIXaR92t//eJoQAef//u//8Hg8/OIXv+Dmm28mHA5jMBgG7GMwGIjFYgCH3D4cubn2Ye2vJuPYHeZhX+fjFEXB5RretSea4dZdJpO6SJN6SJN62EfqQoyXCRFw5syZA0A0GuX73/8+F154IeFweMA+sVgMk8kEgNFoHBRmYrEYDodj2Nfu7g4w1L7JigI5ThMBf3jIxxxMcalKtycwupOME0VJ/9IaTt1lKqmLNKmHNKmHfaQu0vbWgzjyxrWT8YYNGzj99NP711VXVxOPx8nLy2P37t2D9t97W6qgoACPxzNo+4wZM4ZdDlVl2H/5RnLMwc5zNBuresgEUhdpUg9pUg/7SF2I8TJunYxbWlq45ppr6Ozs7F+3ZcsWcnJyWLRoEVu3biUSifRvW7t2LfPmzQNg3rx5rF27tn9bOBxm27Zt/duFEEII8ek2bgFnzpw5zJo1i+uvv57a2lrefPNNbr31Vr71rW+xZMkSioqKuO6666ipqeHOO+9k06ZNXHTRRQBceOGFrFu3jjvvvJOamhquu+46SktLj9Aj4ipWiw6rdXSLEEIIIQ6fcfuk1Wq13H777dx444188YtfxGw2c9lll3H55ZejKAq33347N9xwAytWrKC8vJzVq1dTXFwMQGlpKatWreI3v/kNq1evZsGCBaxevRrlCHVX9ze2wWibXCvHpChCCCGEOIBxbUooKCjgtttuO+C28vJy7r///oMeu2zZMpYtW3a4iiaEEEKIo5hMtimEEEKIjCMBRwghhBAZRwKOEEIIITKOBBwhhBBCZBwJOEIIIYTIOBJwhBBCCJFxJOAIIYQQIuNIwBFCCCEyVEtLC9OmTaOlpWVcyzFt2jQ++OCDA2774IMPmDZt2phfU+YMEEIIIcRh9fbbb5OVlXVErykBRwghhBCHVV5e3hG/ptyiEkIIIT4FamtrufLKK1mwYAFz5szhy1/+MnV1dUD6NtGpp57Kgw8+yNKlS5k/fz4/+MEPiMVi/cc/88wznH766cybN4/vfe97fPe732XVqlVDuvb+t6iCwSDf/e53WbBgAWeddRabN28e+zeLBBwhhBAi46mqyre+9S1KSkr4xz/+wcMPP0wymeTWW2/t36erq4t//etf3H333axatYqXXnqJp59+GoCPPvqI66+/nq9//es8+eSTmM1mnn/++RGV5ec//zm7d+/m/vvv5yc/+Qn33nvvWLzFQSTgCCGEEBkuEonwpS99iR//+MdMmjSJWbNmccEFF1BbW9u/Tzwe5yc/+QnTpk1j6dKlLF26tL915aGHHmL58uV86Utfoqqqil/84hcUFhYOuxyBQIAXXniBn/zkJ8yaNYulS5dy9dVXj9n73J/0wRFCCCEynNls5pJLLuHpp59my5Yt7N69m23btuFyuQbsV15e3v+9zWYjkUgAsHPnTr74xS/2b9PpdMyePXvY5aivryeZTDJ9+vT+dXPmzBn2eYZCAo4QQgiR4UKhEN/4xjfIzs7m1FNP5ZxzzmH37t3cc889A/YzGAwDXquqCoBWq+3//uPbRuvj1xwrEnCEEEKIDLdmzRq6urr45z//iU6X/uh/++23hxxSqqur2bp1a//rZDLJ9u3bB7TEDMXkyZPR6/Vs3ryZ448/HoBt27YN6xxDJQFHCCGEyHCzZs0iFArxyiuvMHv2bN577z0eeOABbDbbkI7/yle+wmWXXcbixYtZtGgRDzzwAK2trSiKMqxy2Gw2zjvvPG688UZuvvlmIpEIt91220je0iFJJ2MhhBAiw+Xl5bFy5Up++ctf8vnPf54nn3ySn/3sZ3R3d9PZ2XnI4xcsWMDPf/5zVq9ezQUXXEAwGGTBggXo9fphl+WnP/0pCxYs4Ktf/So//vGP+cpXvjKSt3RIijpWN9GOUh5PgKHWgKJAbraRLa8/BKOstVmnXEJ3b3R0JxknigIul31YdZeppC7SpB7SpB72kbpI21sPR7tNmzZhs9mYPHly/7qzzz6bK6+8khUrVoxjyQ5OWnCEEEII8YnWr1/PVVddxbp162hubuaOO+6gvb2dpUuXjnfRDkr64AghhBDiE1166aW0tLRw7bXXEggEmDFjBnfddRd5eXmsWLGC+vr6gx571113sXjx4iNY2jQJOEIIIYT4RDqdjhtuuIEbbrhh0LbbbruNeDx+0GMLCgoOZ9EOSgKOEEIIIUasuLh4vItwQNIHRwghhBAZRwKOEEIIITKOBBwhhBBCZBwJOEIIIYTIOBJwhBBCCJFxJOAIIYQQR6kjORnBWF7rySef5NRTTx2z8x2IPCYuhBBCHKUURaHLHyGWTB3W6xi0GvIdpsN6jbEmAUcIIYQ4isWSKWKJwxtwjkZyi0oIIYQQY66lpYVp06bR0tLSv27VqlVcdtllA/ZLpVJ85zvf4bzzzsPv97Nq1SquvvpqLr30UpYsWcKaNWtGdH1pwRFCCCHEuPnNb37Djh07ePDBB3E4HAC8+uqr/OIXv2D+/PlUVlaO6LwScIQQQggxLu666y5efPFFHnroIVwuV/96l8vFJZdcMqpzyy0qIYQQQhxxXV1d/PGPf8RgMJCXlzdgW0lJyajPLwFHCCGEEGNOUZRB6xKJxIDtf/nLX1BVlT/96U8D9jMajaO+vgQcIYQQQow5vV4PQF9fX/+6/Tsc5+Xlcfzxx/ODH/yAe+65h8bGxjG9vgQcIYQQ4ihm0Gow6A7zoh1+XHC5XBQVFfGXv/yF5uZmnnzySd54441B+y1fvpz58+dz4403jkFt7DPmnYx7enrIyckZ69MKIYQQ4mNUVT1iA/CpqnrA204Ho9FouOmmm7jxxhtZvnw5xx9/PN/61rf497//PWjfG264gRUrVvDSSy+NWXkVdQRjL8+YMYN33nlnUJBpbW3lnHPOYf369WNWwMPN4wkw1BpQFMjNNrLl9YdglCNWzzrlErp7o6M7yThRFHC57MOqu0wldZEm9ZAm9bCP1EXa3noQR96QW3CefvppnnzySSCd4lauXNl/f22vrq6uQT2hhRBCCCGOtCEHnDPOOKO/c9CaNWuYP38+Vqt1wD4Wi4UzzjhjbEsohBBCCDFMQw44VquVa665Bkg/n758+fIxeYxLCCGEEGKsjaiT8QUXXEBjYyNbtmwhHo8P2n7++eePtlxCCCGEECM2ooBz99138/vf/56srKxBt6kURZGAI4QQQohxNaKAc8899/CDH/yAK6+8cqzLI4QQQggxaiMa6C8ajXLmmWeOdVmEEEIIIcbEiALOueeey4MPPsgIhtARQgghhDjsRnSLKhgM8vjjj/Pss89SWlo6aDycv//972NSOCGEEEKIkRhRwKmoqOBb3/rWWJdFCCGEEMOhqunhkjPtWmNgRAFn73g4QgghhBhHigKBDkgMHrJlTOn0YC88vNcYYyMKONddd90nbr/55ptHVBghhBBCDFMiDsmjc27Dw2lEnYw/LpFIUF9fz/PPPy8ziQshhBACgLVr13LJJZcwb9485s+fzze+8Q26urp48skn+dKXvsTKlStZtGgRzzzzDMFgkOuuu47jjz+e2bNn89nPfpZXXnllxNceUQvOwVpo7r77bnbt2jXiwgghhBAiMwQCAa666iquuOIKbrnlFrq6urj++uu58847mTlzJuvXr+db3/oW3/3ud8nOzuamm26ivr6ee+65B7PZzN13380NN9zAySefjMFgGPb1x6QFZ6/PfvazvPzyy2N5SiGEEEIchSKRCFdffTUrV66krKyMRYsWceaZZ1JTUwOkZz749re/TVVVFTk5ORxzzDH86le/YsaMGVRUVPC1r30Nr9dLd3f3iK4/ohacAwmFQjz66KNkZ2eP1SmFEEIIcZTKy8vj/PPP569//Svbt2+ntraWnTt3snDhQgByc3MxmUz9+59//vm88sorPProo+zevZutW7cCkEwmR3T9EQWc6dOnoxzgUTGj0civf/3rERVECCGEEJmjs7OTCy+8kFmzZnHCCSdw8cUX88Ybb7Bx40YgnRn298Mf/pD169dz3nnncckll5CXl8cXv/jFEV9/RAHn4wP5KYqCXq+nuroam8024sIIIYQQIjO8/PLLZGVl8ec//7l/3X333XfAWRCCwSDPPvssjz76KHPnzgXgzTffBBjxrAkjCjhLliwBoKGhgbq6OlKpFJWVlRJuhBBCiCNNpz/0PuNwDafTSVtbG++99x6lpaW88MILvPTSS8yZM2fQvgaDAbPZzEsvvUROTg719fX86le/AiAWi42syCM5yO/3c9111/Hqq6+SlZVFMpmkr6+PY445htWrV2O320dUGCGEEEIMg6oeuQH4hjmS8ec+9zk+/PBDvvOd76AoCnPmzOFHP/oRq1atGhRaDAYDt956K7/73e+47777KC0t5dvf/jb/+7//y/bt26mqqhp2cRV1BG0/P/zhD6mrq+PWW29l8uTJANTW1vLjH/+YqVOn8pvf/GbYBRkvHk+AodaAokButpEtrz8Eo5xndNYpl9Dde3QOzKQo4HLZh1V3mUrqIk3qIU3qYR+pi7S99SCOvBG14Lz22mvce++9/eEGoLq6mp/97Gd84xvfGLPCTVRmk360+QaOnuk8hBBCiKPOiAKO0WhEoxk8hI6iKCN+nOto0usOf6r/RSKEEEJMdCMa6O/UU0/ll7/8JU1NTf3rGhoa+PWvf82yZcvGrHBCCCGEECMxohacH/zgB6xcuZKzzjoLh8MBgM/n4+STT+anP/3pmBZQCCGEEGK4hh1wGhsbKS4u5r777mPnzp3U1dVhNBqpqKgYUS9nIYQQQoixNuRbVKqq8utf/5rPfe5zrF+/HoBp06axfPlynnjiCc455xx++9vfjnhAHiGEEEKIsTLkgPP3v/+d559/ntWrV/cP9LfX7bffzurVq3nqqad46KGHxryQQgghhBDDMeSA8+ijj/LTn/6UU0455YDbTz31VL7//e9LwBFCCCHEuBtywGltbe2fH+JgjjvuOJqbm0ddKCGEEEKI0RhywMnNzaW1tfUT9+no6MDpdI62TEIIIYQYgiPZ73W412ppaWHatGm0tLTQ3NzcP3nmkTLkp6jOOOMMVq1axT333INeP3jSrUQiwW233cZJJ500pgUUQgghxIEpioI75CaejB/W6+i1evIsecM6pqioiLfffpucnByuuOIKlixZckTHyhtywLn66qu56KKLWLFiBZdddhmzZ8/Gbrfj8/nYunUr999/P319fdxyyy2Hs7xCCCGE2E88GSeWGtmM24eTVqslL294oWgsDTngOBwOHn30UX7/+9/z29/+lnA4DKSbrOx2O8uXL+faa6/F5XIdtsIKIYQQ4ujQ0tLCaaedxgUXXMCaNWv6l/vuu4+1a9fy+9//nm3btqEoCscccww33XQT+fn5Y3b9YQ3053Q6+fWvf83PfvYzmpub8fv9OJ1OJk2ahFarHbNCCSGEECIzXHHFFTQ0NLBgwQKuuuoqAoEAV111FVdccQW33HILXV1dXH/99dx555385Cc/GbPrjmiqBoPBIKMWCyGEEOKQbDYber0ei8WC0+nE7XZz9dVX89WvfhVFUSgrK+PMM89k06ZNY3rdEQUcIYQQQoiRyMvL4/zzz+evf/0r27dvp7a2lp07d7Jw4cIxvY4EHCGEEEIcMZ2dnVx44YXMmjWLE044gYsvvpg33niDjRs3jul1JOAIIYQQ4oh5+eWXycrK4s9//nP/uvvuu2/Mx/SRgCOEEEIcxfTawWPTTbRrWCwWGhoa6O7uxul00tbWxnvvvUdpaSkvvPACL730EnPmzBmj0qaNa8Dp7Ozkpptu4v3338doNLJ8+XK++93vYjQaaW5u5qc//SkbNmyguLiY66+/fsAggu+++y6/+c1vaG5uZt68edx0002UlZWN47sRQgghjixVVYc9AN9orqUoyoiO/cIXvsD111/P17/+dR5//HE+/PBDvvOd76AoCnPmzOFHP/oRq1atIhaLYTAYxqS8inokx3nej6qqfOlLX8LhcPDDH/4Qn8/H9ddfz2mnncYPf/hDzjvvPKZOncq3v/1tXnnlFf70pz/x/PPPU1xcTFtbG2effTbXXnstS5cuZfXq1dTV1fHMM88Mu/I9ngBDrQFFgdxsIx89c9+QjzmYxeddRndPdHQnGSeKAi6XfVh1l6mkLtKkHtKkHvaRukjbWw/iyBu3Fpzdu3ezYcMG3nnnnf7BAb/zne/wu9/9jpNPPpnm5mYefvhhLBYLVVVVvPfeezzxxBNce+21PPbYY8yePZuvfe1rANx8882ceOKJrFmzhmOPPXa83pIQQgghJohxCzh5eXncfffdg0Y+DgaDbNy4kZkzZ2KxWPrXL1q0iA0bNgCwceNGFi9e3L/NbDYza9YsNmzYMOyAM5wGn/59FRhZI91+5xrmtSeSveU+Wss/lqQu0qQe0qQe9pG6SPu0v//xNG4Bx+FwsHTp0v7XqVSK+++/n+OOOw632z1ouObc3Fw6OjoADrl9OHJzh9d0qCZjWC1jc3/waG+2HG7dZTKpizSphzSph32kLsR4mTBPUd16661s27aNxx9/nL/+9a+DOhkZDAZisfRkYuFw+BO3D0d39/D64OQ4jfSFYjAG95Q9nsDoTzIOFCX9S2s4dZeppC7SpB7SpB72kbpI21sP4sibEAHn1ltv5W9/+xt//OMfmTp1KkajEa/XO2CfWCyGyWQCwGg0DgozsVgMh8Mx7GurKsP/yzeSYwaf4qj/Sz+iustQUhdpUg9pUg/7SF2I8aIZ7wLceOON3Hvvvdx6662cddZZABQUFODxeAbs5/F4+m9LHWz7eE7LLoQQQoiJY1wDzm233cbDDz/MH/7wB84+++z+9fPmzWPr1q1EIpH+dWvXrmXevHn929euXdu/LRwOs23btv7tQgghhPh0G7eAU1dXx+233843vvENFi1ahNvt7l+WLFlCUVER1113HTU1Ndx5551s2rSJiy66CIALL7yQdevWceedd1JTU8N1111HaWmpPCIuhBBCCGAcA86rr75KMpnkT3/6EyeddNKARavVcvvtt+N2u1mxYgXPPPMMq1evpri4GIDS0lJWrVrFE088wUUXXYTX62X16tUjHmFRCCGEEJll3EYynihkJOPhkxFK95G6SJN6SJN62EfqIu1wj2Q8mukTxvNaTz75JLfddhuvvfbaAbf/+Mc/BuC3v/3tiK8xIZ6iEkIIIcTwKYpCvKsLNR4/vNfR69F/bPy5iU4CjhBCCHEUU+Nx1BGMA5fpxv0xcSGEEEJknpaWFqZNm8ZLL73E6aefzpw5c7jqqqsGjXMH8NFHH3H++eczd+5c/vM//5NwODzq60vAEUIIIcRhc8cdd/CHP/yB+++/n82bN3PvvfcO2N7T08NVV13FCSecwNNPP011dTUvvvjiqK8rt6iEEEIIcdh85zvfYe7cuQCce+65bN68mfLy8v7tL7zwAjk5OfzgBz9AURSuvfZa3nzzzVFfV1pwhBBCCHHY7B9mbDYb8Y91iK6trWX69OkDntCaM2fOqK8rAUcIIYQQh41erz/kPh8fsWYoxxyKBBwhhBBCjJspU6awbds2kslk/7rt27eP+rwScIQQQoijmKLXoxgMh3cZgxaVgzn77LMJh8PcdNNN7N69m7vvvnvAfJMjJZ2MhRBCiKOUqqpHbAC+wzVqclZWFnfffTe/+MUvOO+88zjmmGM477zzBt22Gi4JOEIIIcRR6kjOwTjca5WWlrJz584B66699tr+71esWNH//axZs3jsscdGV8CPkVtUQgghhMg4EnCEEEIIkXEk4AghhBAi40jAEUIIIUTGkYAjhBBCiIwjAUcIIYQQGUcCjhBCCCEyjgQcIYQQQmQcCThCCCGEyDgScIQQQoij1GinMzjc19q+fTvr1q07DKU5NJmqQQghhDhKKYpCny9KKpE6rNfR6DRYs4zDPm7lypVcc801LFy48DCU6pNJwBFCCCGOYqlEimTicLfkHN4AdTjILSohhBBCjLnLLruM1tZWrrvuOk499VSmTZs2YPuPf/xjfvzjHwOwatUqrr76ai699FKWLFnCmjVrRn19CThCCCGEGHOrVq2isLCQ66+/nuuvv/6Q+7/66qucc845/O1vf2Pu3Lmjvr7cohJCCCHEmHM6nWi1Wux2O3a7/ZD7u1wuLrnkkjG7vrTgCCGEEGLclZSUjOn5JOAIIYQQ4rBSFGXQukQiMeC10Tj8p7Q+iQQcIYQQQhxWer0egGAw2L+upaXlsF5TAo4QQghxFNPoNGh1ymFdNLqRxQWLxcLu3bspKCjAZDJxxx130NzczN133822bdvGuCYGkk7GQgghxFFKVdURDcA30msd6FbTJ7nkkkv4/e9/T0NDAzfeeCN//OMfue+++zjjjDO49NJL6e3tPUyllYAzKjne9RR1vQpASmMkqTGQ0hoJWCrpdC0FRRrIhBBCHD7DDRxH+lqXXnopl156af/rz3/+8wfc79prrx1xuQ5GAs4IZXs3UNV0PwqDR4/M8W3CFHPTWHwhHMEfPiGEEEKkScAZgSz/dqqaH0BBxZ19DN3OhWhTUTSpKKaoh+KuVyjofhcVLU3F50vIEUIIIY4wCTjD1fgu1Q33olGTdGfNp770i4NuRUUNOUxueYTC7rdQFQ3NRZ+XkCOEEEIcQdJJZBh0nRvh4UvRqAm89hnsLvvyAfvZeHKOpb7kCwAUed6ktOM5OIJT2gshhBCfdhJwhkiJBXH88ysosSB+axU15f+Bqjl4A5g793gaii8EoNj9GoWeN49UUYUQQohPPQk4Q5WMQiqJWnYsNZVXomoMhzyky3UiTUXpHuMlHS+ij3kPcyGFEEIIARJwhkw159LztXXwH/8kpTUN+bgO1zIClkq0aoyyjmcPYwmFEEIIsZcEnOHQmYbfWVhR0k9SAS7vOqx9DWNeLCGEEEIMJAHnCOizlOHOPgaA8vanQU2Nb4GEEEKIDCcB5whpKTybpMaILdRErnfdeBdHCCFEBlCP4BO6Y3WtlpYWpk2bdtgn25RxcI6QuN5BW/5plHU8T1nHcxD7DVL9QgghRkNRFIK9PSQTicN6Ha1Ohy07Z0zOVVRUxNtvv01Oztic72DkE/YI6nAtI7/7fYzxHtR3b4O5/zXeRRJCCHGUSyYSJOPx8S7GkGm1WvLy8g77deQW1RGkavQ0FZ2bfvHebWiCbeNbICGEEOIw+e///m9+9KMfDVj3ve99jyuuuGLALSq/388PfvADFi5cyEknncSNN95IJBIZ9fUl4BxhvVlzCVgqURIRzBvuHu/iCCGEEIfF2Wefzeuvv058T+tSLBbj9ddf5+yzzx6w3w033EAgEOChhx7i9ttvZ/PmzfzqV78a9fUl4BxpikJb/ukAmLY9gBL1jXOBhBBCiLF38sknk0ql+OCDDwB4++23MZlMHHvssf37NDU18corr3Drrbcybdo05s6dy4033shTTz1FIBAY1fWlD8448Nmno+bNQOPejmnLfYQXXTPeRRJCCCHGlMFg4PTTT+ell17ipJNO4qWXXuKss85Co9nXtlJXV0cqleLkk08ecGwqlaKxsZHZs2eP+PrSgjMeFAX1+JUAmDfdk54GQgghhMgwy5cv59VXXyUWi/Haa6+xfPnyAduTySR2u52nn356wPLSSy9RXV09qmtLwBkvsy8gaS1EG+rCtPOp8S6NEEIIMeZOOOEEkskk9957LyaTicWLFw/YXllZSSAQQFEUysvLKS8vJxKJcMsttxCLxUZ1bQk440VrIDzv6wCYN9whoxsLIYQYEa1Oh1avP7yLbmQ9WnQ6HWeeeSZ33HEHn/3sZ1E+Nt1RVVUVS5cu5fvf/z6bNm1i69atXHfddYRCIRwOx6jqRfrgjIKqqrSnevGpfQTUEAE1TEANk63YOFY/DZPyyTOOR2ZdiuWj/4eutxZDw6vEKs84QiUXQgiRCVRVHbMB+IZyrY8HlKE4++yzeeSRRwY9PbXXLbfcwq9//WuuuOIKdDodS5cu5Sc/+cloiysBZ6Takt38LfoqtckDj2XzWPQtlhnmcJp+Pjka+wH3UQ12IrO+gmX9nzCvv0MCjhBCiGEZSeA40tc69thj2blzZ//r0tLSAa9zcnL4wx/+MOryfZwEnGGKJWM8HXmP52IfkiSFHi0uTRZ2xYxdMWNTTOxItNKp9vJibC0vxdZxjG4q5xuPJ1/jHHS+8LwrMW+8G0P7B+g61pIoXHTk35QQQgiRYSTgDMPmno388a3fUR9rAGCetpKvmE4d1EKTUlU2Jet5ObaOHckWPkjsZGuyif8yn0eltnDgvtZCIlNXYN7xCJb1d+D/3F1H6u0IIYQQGUs6GQ+RO+Lmex98h3p/Aw7FwrdMy7nW/PkD3n7SKArzdZP5geUifmb5MuWafIJqmFtDT7Al0Tho//CCqwAw1P8Ljb/psL8XIYQQItNJwBkim87GMa5juWTaF7nJdjnH6KcO6X5kuTafH1guYqZ2ElHi/F/4H7wf3zFgn2TOVGJlJ6OoKcyb/3a43oIQQgjxqSEBZ4jMOjM3HXML1x3zQ6yKaXjHKgb+03wex+imkiTFXZEXeWDHgwP2Cc+9EgDTtocg1jdm5RZCCCE+jSTgHCE6Rcs3TZ/jNP18AG756H/4V8vz/dtj5aeQyKpAE/Nj2vXkOJVSCCGEyAwScI4gjaJwiXEZyw3pkRz/uOUWav270hsVDZE5XwX2TN+gquNVTCGEEOKoJwHnCFMUhQsMJ7K0+ERiqRg/X3s9gbgfgMiMi0npbeh6a9C3vDXOJRVCCCGOXhJwxoFGUbjpxBspNBfRHm7j5g2/IqWm0gP/zbgYAPPGv4xzKYUQQoijlwSccZJlzOKXC3+DQWPgffe7PFCXfnoqMucKAIyNr6L17h7HEgohhBBHLwk442hK1jT+c9b3Afjrrrv50P0+SedkouWnAmDa/NdxLJ0QQghx9JKAM84+V3YO55Sdh4rKzRt/hS/m2/fI+PZHUWKBcS6hEEIIcfSRgDMBXDPzv6iwVeKNeblj+yriZSeTyK5GEw9i2v7oeBdPCCGEOOpIwJkADFoj35vzYxQU/tX6PB961hCe+zUATJvvBTU1ziUUQgghji4ScCaIWdlzuKDiIiA9Po63ajkpgwOdrwFD4+vjXDohhBDi6CIBZwK5cupV5JsK6Ai3c0/9A0RmXgLsGfhPCCGEEEMmAWcCMess/PfsHwLwZMNjbCk/AVXRYGh+E21PzTiXTgghhDh6SMCZYI7NP57Ti88kRYqb6v9KpOJ0AMyb7x3nkgkhhBBHDwk4E9DKmf9FlsHJ7kAdz7vKADDteAwl4h3fggkhhBBHCQk4E1CWwcm3p18LwG973iKSXY2SCGPa/sg4l0wIIYQ4OkjAmaBOLzmLmc5ZRFIR/pFbDIB5818hlRzfggkhhBBHAQk4E5RG0XDNzP8G4Pfx3cQNdrSBZgwNL49zyYQQQoiJTwLOBDbdOZPPlp5NRKPhn04XAOZNMsu4EEIIcSgScCa4r0/7FhadhduNUVKKBkPre+i6No13sYQQQogJTQLOBJdjzOUr1V+lU6fjFVsWAJZ1t41zqYQQQoiJTQLOUeDCiosptZTxJ4cRAEPdC2h7a8e5VEIIIcTENSECTiwW45xzzuGDDz7oX9fc3MwVV1zB/PnzWb58OW+//faAY959913OOecc5s2bx+WXX05zc/ORLvYRo9fouXrmd6g1GHjdYkFBxbLudgAcdgNOx9gsDrthnN+pEEIIMTZ0412AaDTK9773PWpq9k1FoKoqK1euZOrUqTzxxBO88sorXHPNNTz//PMUFxfT1tbGypUrufbaa1m6dCmrV6/m6quv5plnnkFRlHF8N4fPcfknsiTvOO6KvMkpoRDGXU/Sd8x30TgmE2jaMSbXsE+aPibnEUIIIcbbuLbg1NbWcvHFF9PU1DRg/fvvv09zczO/+tWvqKqq4qqrrmL+/Pk88cQTADz22GPMnj2br33ta0yZMoWbb76Z1tZW1qxZMx5v44i5avo1bDVbeN9kREklsGy4Y7yLJIQQQkxI4xpw1qxZw7HHHssjjwwcoXfjxo3MnDkTi8XSv27RokVs2LChf/vixYv7t5nNZmbNmtW/PVNV2idzTtnnucuZ7mxs2vYQ9HWNc6kOH5vdhM1hHpvFbhrvtyOEEOIIGtdbVF/+8pcPuN7tdpOfnz9gXW5uLh0dHUPaPhzDuaPVv68Co70Rpgzz2ntdMfVKLmv9FxuNXuZFo2g+vBMmXzTK0uxXriGUae8+h/tuoKIobG3sHpNzzSrPPSzlPVJ1MdFJPaRJPewjdZH2aX//42nc++AcSDgcxmAY2OHVYDAQi8WGtH04cnPtw9pfTcawWsamM67LNbxrA7iw8835V3G3/yZWdXnQrL0Hx3ErwZQ16vIoOh0ul3HI+w+37oYrkUyRnW0dk3PpdJoR1fdQHe66OFpIPaRJPewjdSHGy4QMOEajEa/XO2BdLBbDZDL1b/94mInFYjgcjmFfq7s7gKoObV9FgRynkb5QDIZ4zCfxeAIjOu6zeZ/nkdyHqen1MSUWpOu5m+mcetmoyzN7/gJ6h1AmRUn/0hpO3Y2EzW6mt7dvTM5V7DDhHaNz7e9I1cVEJ/WQJvWwj9RF2t56EEfehAw4BQUF1NYOHOfF4/H035YqKCjA4/EM2j5jxoxhX0tVGf5fvpEcM/gUIz6HXmPk6zNWcpfnv7nF3U3+7kfpm34hKePoW3GGU6YR1d1wysLYnX8sz3XA8x/mujhaSD2kST3sI3UhxsuEDDjz5s3jzjvvJBKJ9LfarF27lkWLFvVvX7t2bf/+4XCYbdu2cc0114xLecfDssJTeKr0OHb4XmJ6LIT9g9tpKR9lK8602WNTOCGEEGKcTYiB/j5uyZIlFBUVcd1111FTU8Odd97Jpk2buOiidGfaCy+8kHXr1nHnnXdSU1PDddddR2lpKccee+w4l/zIURSFb838T/6Y7QQgr+slDFH3+BZKCCGEmCAmZMDRarXcfvvtuN1uVqxYwTPPPMPq1aspLi4GoLS0lFWrVvHEE09w0UUX4fV6Wb16dcYO8ncwM5wzyZqxgvdNRrRqkpLmR8e7SEIIIcSEMGFuUe3cuXPA6/Lycu6///6D7r9s2TKWLVt2uIs14V2z4Dtct+tZjmtrJbfnHTr6ziZsrRjvYgkhhBDjakK24IihK7IWUZ59Bs9b04MiljQ/OM4lEkIIIcbfhGnBESO33LKM/5f7Dmf01eD0b8Hh24w/a854Fysj2e3GgYM8KpBKJrDbjcMaOkAFAoHoGJdOCCHEXhJwhimFlqknnzsm5xkLCuA02znZdR4Pe//MZf4AxS0PEsu/FZThNdB9unowjYwCtNQ2DFiRnW1Nj9czjIBTWl0xxiUTQgixPwk4w6QCb/zr+aGP66Cq6Hx+lFQKVaMBRUHVKJz55f9grCKF1xNinjqb1dnVnB/YgL2vEWPtv+jMlj5KQgghPp0k4IwlVcXY0YW5uRljlxtjpxuj24MmHh+06+7/vR1NYRG66iloq6agq56Cfu48NDm5I7q0RtFwhunz3JFdzw96vJR3PkiPfT5x3egH/xNCCCGONhJwxoC2L4Rjy1YcGzZj6ho8Fk1Kq0XValFSKVBVlFQKRVVJdbQT62iHt/+d3lFR0M1bgPEzp2JYdgpaV96wyjFdW8WfnYvZFnyTmbEwlR33s6t05Vi8RSGEEOKoIgFnFExt7eS88z62XbXp8EI6zARKq+hzFtNnLSZgLiOgK0CjB60xuWdJcOzx8zAFmqBpN4naXSR27iBZW0NiwzoSG9bR9//+B92cuZjOvxDjKaej6Ib2R7XCcA43urZzf1sr+f4PcAdOotc+73BWgxBCCDHhSMAZAW0wiOvVf5O1cXP/ulB+MR0lx9JiOZ6Efr8ZsJPpJRmFeHDf6pfrm9DqFAqqF1J61mkU/6cTrc9N7M3Xib7xGoktm0hs2khw00ZCf7oN04VfwPT5C9DYP3lC0TxNDpOsp3Of40mu8AeY3H4v662/JaUxjXEtCCGEEBOXBJxhUONxeu+9l4rb7kK7Zzbz7inzaCg6E5+mon8/nSWG3hJHZ06gNSfQGROkkhqSUe2eRYdezcXvidC2w0vbDi+KRqF4upMZJ59Hzhe/TLKrk+gLzxJ+8nFS7i5Cd6wm9Ne/YDrnPCxf+Q80ua6DlvMs3VJ+l7OW00PbKU30UN71BPWFlx7u6vlU0Gq1FFeUDVyn02B25Az7PEIIIQ4fCThDpIZCeK/+Osm6WrRAqLCIXVUX06Ofmt5Bk8KSH8JWHEBvHdyp+OOWf/FzNG7vpmVrL63bevF1hWndlv6+sNrBjGXF5P3HlZgvuYzoqy8RfuRBknW1RB5/hMiz/8B88SWYL7kMcizkTCkedP4v+y7n1+FbuaPTTXHPS0Rnn0ske/onF0qeEx+SmvW7971QwJFlwe8LDesx8ZnHHeLPQgghxKhIwBkiNRQi2dKMNjeX3bOXsltzCqqqQ9ElsZf6sRT2odWnhnw+RVFwFlpwFlqYfVoJvs4QO97qoGlzNx21fjpq/bjKbcw9oxTX587B+NmziX+0htBdd5DYvpXw3+8l8tQTKN/8OjV6UPX6Aed3qAY6rTN4ztrH2X0hct+/iTfLvktKYzhomabMXTji+hFCCCEmEpmqYYg0LheOB56i/vL/o045A1XVYciKULCwA3tZYFjh5kCyCiwce9FkPvefc6g6Jg+NVsHTGOS1u3fw/qN1hP1xDMccS9af78F+0y1oyytRA366/+cPVNzxF2w7a9h/cB5FUThbcwq/z82lW6MhK9bOHPdTo60GIYQQ4qggAWeIErEkrz7cTu16L6DiKPfimtOF1pgc0+vYckws+nwFZ393LpWLXKBA0+YeXvh/m9n2RhvJhIrx5M/g/NuD2K77GbrCAgxeHyWPPknJQ4+h7+7pP5dLyWGG7liuy8slBVT636UksHZMyyuEEEJMRBJwhigeTRLyxbDlGMmb14l9kh/lMPZZMTsMHHN+JWd8ayauSTaS8RRbXm3lX6u20FHjQ9FqMS0/h0n//CfdJx6HqtFgq6un4s/34Hrt3yh7OkEv0xzLNks+dznTT1/N73wEa6zr8BV8DGk1CnarfkwWrUY6GAkhxKeJ9MEZIrPdwLnfn4uryM6Lj9cMfaqGUcoutnLK16fTvLmHjf9qpq83yr//votJc3OY/7lJOHOy8Jy6DN+8OeT/6xVsdfXkvvMejs1b6DrjVJgxjfM0Z/An5xMsjEQ5JhJlSftfebPsvz6xP86EoICvtnFszlU4d2zOI4QQ4qggLTjDYLTq0WqPfJUpisKkubl89jtzmHJ8AYoCTZt6ePH/NrPzvQ5UFeK5ObRe8gVaL15BzJmF3h+g5Il/UPrAI8zqtjNbM4sf5eXSq9GRFWuV/jhCCCEymgSco4jeqGXB8kmc9s0ZOAvNxMJJ3rx/J92b80mEdaAoBKdNoeFbV+I5+URSWi3W+kYq7ryXy97SEdLa+VFedn9/nDL/mvF+S0IIIcRhIbeoxokCOB0ju0XknJlDxTQnm95oY+0LzUR9JjrXFeKY5MdW4ge9nu5lJ+GfO5u8l17FvquWSW+t46t9Zm47y8xdziyu8vpY0PkQEa0Dt1XGZBkyRaFibsWAVVqthpzkMJ+iO5wduIQQQkjAGU+Bph2jOr5qCkxeuJin/vdNol4z/gYnoS4L2VN6MDhixLOdtH3xQqw1deT/6xWWrvPy7mQNq6sdTI9pWBbqZUn7Pbxd+h18ptIxeleZTQXeebdm3woFbHYTwUBkWAP9nX7GxOsTZLcbRz7WowKpZAK73YiqQiAQHcuiCSHEsEnAOcpl5ZnJne0m3GXBtzubRMiAe2MB1uIgjnIvGp1K35QqGirLyXnnfa58+X2+Pwn+O9/GE/VJKvFzfNuf+XfZf433WxHjTAFaahtGfHB2tpXe3j5KqyrGsFRCCDEyEnAygKKApSCEMSeCb7eTcJeNvjY7EY+ZrOpezLlhVJ2O7mUnoZ87my9seYq/L+rh8lI7z9VEcFj8HN96B4QuBw4+KWdfLEG7P0p3X4xoo4+GTj+eYAx/JE5fLEkwmiAYTRKKJ4knUyRSKsmUSiKloqqg0yjotEr6q0bBrNdiM+qwGrRYjTocRh3ZFj25VgM5FgPlRTF6IkmcRg0auaUjhBBiGCTgZBCtPkXOtB4i+X14a3NIRvT0bMvDlBvCWdWL1pgknu2keskVzPI/yFZrB9/Oy+Vedxd2SxfJv1+Iet7DtEV01Lr7qHH30ewN0+KN0OIN0xM69Bxbh4NeAy6TlnyLjnyzlmKrjlKbjhKrnlyTBkXCjxBCiI+RgJOBTNlR8hd2EGhyEGxxEOm20Ok1YZ/kw1YcQNEonOu4gMb439ngNPDtzgUUR/LY1lLBjtveo+8TWnEcJh0uq4HibAsOg4ZciwGnWY/NqMVq0GEz6rAYtBi0CjqNBu2e1hoU0i06SZVEKkU8qRKKJ/tbfvpiSQKRON2hON19Mbr7YvSGE3T6I8RT0B5K0h4aPGq0SatQZtNR6dD3L5Ps+vQ1hRBCfGpJwMlQGq1KVqUPS16I3toc4gEjvnonzZ1WOvL6aFAd+EI/IpQw8Oren4I9nWT1apwKQ5LqqjIqXVZKnWZKnSZKs8zYTToUBVwuOx5P4LAOeJidbaG22Y27L06HP0pHIEq7L0qTN0JTT5hWf5RIUqXGF6fGt691SadRqHZZmFloY3aRjZmFNhkPQQghPmUk4GS4hDlBc7mXrR4LNSEdAQ3g29dCo5BCMbViNLfyvdzJnFD3v0wxNpPyK7Q+PhfD176HYerJ43YbyPPBayhA0Z5lAYAjvSRU6IxqaA5rqQ9raQhpaQhr6Etq2NHVx46uPp7c1AlAWfYuFpQ4WDzJyTFlTlw247i8HyGEEEeGBJwM5Itr2OI3sNlvpKZPT1LdE040oEWlJKGlPK6hNKGh3OXniYrH6FC6eL/wWD5/wt0oT1yEMctL6fSNNN3034TL52P55rcxLDrmiL8Xpyv3E7e7gFn7vVZVlc4wbPep7PCpbPOqNAahuTdMc2+YZ7akA09lroVjy7M5oTKbhaVOjDpp4xFCiEwiAWcc2axj14oQSips8Bn5qNfI7tDAAQQLjAlm2mNMs8WYbI2jRHT467OJ9JhJdmbzmcCVPDHn93zQ8QF/sU7jm19+kaynL8ZAExVnemh9Zy3+/1qJfvESLN+8GsPMmWNW7kPpaBzZxKAzgBlmuMAM4SRYTjqDN7Z18mGTl51dQeq7Q9R3h3h4XSsmnYbFk5ycWJnDyVW55NuldUcIIY52EnDGUWeLf1THJ1Iq2z2d3NPoYGvAsK+lBig3x5mTFWWOI0aB8WOdcy0Jcme5ifrS/XKcgUJOrr2EV6f+nQfr/k65qZKzLnwKx3NXoHdvZtIpPXRudNL70Qf4PlqD4bgTsHznGphUParyHylmLSybmsfcPCsA3nCcdc1e3mvo5d36HrqCMd7e3cPbu3v43au1zClycOpUF6dOcVGcdfAO10IIISYuCThHoY5Qkn/WR3i2IUJP1AOkWxyKTQkWOyMsdEZx6g89dYAxK0revE4i3WZmNMzF03oqG0te49bNvyFpvIlTPvsYrg+ux7TrSQrn92KdOYXWZ0LE3n+XxvffRb9gEebLv4p+0TFH1aPaTrOeU6fmcerUPFRVpdbTxzu7e3hrdw+b2vxsbk8v/+/N3cwosHHqFBenTs1jUrZ5vIsuhBBiiCTgHCVUVWVNZ5wnd4d5vyPG3viSZzMyx9TL4uwIxabBj1EfiqKA2RXGlBvmuqrvc936TuqtW7k98Fu6V3+fWfO/x6KFM8he/1vshhomf30GnW0LCL7wDvH1a4mvX4tuxkzMF1+C4TOnoeiOrh8pRVGYkmdjSp6NK46dRFcgyhu1Hl6r8bC+xcf2ziDbO4OsfruBapeV5XOLyA7FKbTox7voYhhGNQ3FflRkGgohjhZH16fRp1A8pfJqc5SHakLs9u8LMAvz9Jw/2cwXzj6Ofz32wKivoygwdUkxfyz7PVe9+TU6Te28UHEPmveupobFzK2+heMiv8Lg3U6po4HUH75P0xs+Iv/8J4nt2wj88qdo/rQK04ovYDr3fDSOrFGXaTzk241cvKCEixeU0BOK8UZtN6/v8vBhUy+1nj7+77VaAMrtepbkW1hcYMY2zmUWhzaqaSj2U1pdMepzCCGODAk4E1QonuKZhgiP1oRxR9LtNWadwjnlJs6fbGKSPf1Hp9eO7dM/DmMWNx//e1a++3XasmpZO+8fHLPhAjbWTGa35hbOzF9NYWIT2ndupKTyJHx/u4PQy+8RfvpxUl1dhO5YTeivf8F42pmYzluBbvqMo+r21f5yLAZWzC1ixdwifOE4/67r5s36Xt7a5aYxEKcx4OOJOh9z8i0syjUyL9eEYYz/PIQQQoyMBJwJJpxQeWp3mAd3hfDF0qPo5Rg1fKHazHmVJuyGw/8BWmGv5Pr5v+Bna3/MOvObTL6glCXNZ9O4QeGJjp8z1/I8x9nvw9DyNjkdKzCe+BPMlzxF9PVXCT/6EMnaGqLPPUP0uWfQTpmK6dzzMZ7xWTS2o7etI8us59zZhVy2tIonn1vPWneYDzpDNAbibOwKsbErhFGrsMBl5tgCM1OdxkPOn5VKJkjG4yQTcVKJ9PfqntEWFdKjP4OCVq9Hpzeg1evR6PRHbWAUQogjSQLOOMotsvZ/H0mkeHRrgLvW+ugOp1tsyrN0fH1hFudOs2HQHtkPtRMLlvLfs3/AH7bcwuMdD5A3J4fPn/EFGjZ4qF/7BRo9CzgtaxVF7MTx5nUo791N7JifkPWX+0hu2UTkmXTgSdbsou8Pt9C3+v9hPPkzGM9anu6UfJT11dmfw6DllBIbp5TY6AjH2dAb5+1mP92RJO93hni/M4RDm2KBMcAcunBGu0mEg6SiYZKxCMlYhJ0PREnGRza3l85gRG+2YOhfrJjsDswOZ3rJcu773uFEb5LO0UKIT5+j91MmA2yrryWlwputCg/t1NATTYeYAovKF6pTLC1OoNVEqG3qPOg55hdPP2zlO2fS+fhjfu7edQd/2rEKm97O5044h2knFJAMzmT9K3Oo3/V3Fpofxh6rw/7OV2l+6xhaJ3+X7K/+kOxr/pvYyy8S+cdTJBvrib78L6Iv/wslJxfTmZ/FcNoZ6KYdfbewktEwUZ+bqLcLfG7m9PVQ7e2mMWZgh3kyNdYq/Jh4M5TFm2RRGLEzI7iTKX2NGFOxA55T0WjR6nQomnQLnbpnDgw1lSKZiLP/nBiJWJRELErY1zuk8uoMRswOJ9acXGy5Bdhd+dhce77m5mO02o66PwMhhDgUCTjjaEu3wt+2a6j3pz9cXCaVi6ak+EyJyvgMrKtgcwz81/43FnyTsNLHAzvv43+2/Ja8LBenlJ6CLkfDKZfPJh67mcYNX8fw3h+Y1PckZZoPKam/lJrtS3lXvQhT9bEUXXcmuYlWkq+9SPTVl1B7ugk//ADhhx9AU1iEYdkpGJedim7W7P4P+IlAVVX6ejz0tDTQ3VxPT3MD3rZG+np7Drh/EVAUauWUyGaaHFPYaqygTsmhw1RIh6mQt/JOZrYtxbG5Oq48dwGxOGj1hgHB5mDlSCWTJOMxkvEYiWiUWCRELNRHPBImGgoS8fsIB3yEfb2E/T7Cfi9hv5dENEIiFiXg6STg6QS2DTq/3mTG5sonu7AIrd6MLa8Auyu96I0yDpAQ4ugkAWccuKNarrpvHa/u0AJg0alcWJ1iebmKXju+Zdva0D1o3WnOy2nKc/OW+0Wuf+dHXDPt55wy+TR6e/vSDQv5uXDeTfi7v0rhB7+jwPcm08xvMo03aaxfwPotF/BeYg45JWeTd81FFAR2YNr4JvEP3iXV0U7kkQeJPPIgGlcehpM/g2HZKejnzj/i7z0WDtHdtBt3fQ2ehlrcDTVEg4ED7quzODA68zA683EUFJHSOzDYstFbs1C0OmYCnwV80SRrukK81xGiPZRgQ0DLhoDK03/dymdn5HPOzAIqcg0HvMZeiqKg1enQ6nRgtgzrPcWjkXTY8Xnp63ET6O4i6Oki4Oki2N1FyNtDPBKmt6WR3pbGQceb7A7srkLsrnzsrgJsrgIceQUYrfZhlUMIIY40CThHUCwFr7gtvOq2kFS70CgqZ01S+cKUFI5P/owbV4qi8B+T/5twso+Pet5i1c6fozEmmGs5acB+8dxqmpffRZaxCfXf/4tp9/OUG9dTblyPO17J9u7TqGlZyg7VhUZ3EdnnfZHiWA3O5rXot60h5XETefIxIk8+hpLlJH7aqRjREZtUjqof5bgzyuDWqVg4RPuubbRu20Lr9s14GutR1YEDJGq0OnJKJ+EqrySvoorCyio21QTQGva0bChgs5sIBiL9s7HvL8uo5YwyO6eX2mgKxnm/I8SHXSE6A1H+tqaZv61pZnaRnXNmFXDGtDwcprEdX0dvNKHPK8SRVwgMvp2ZiMX6g0880E3Lzp0EPZ0E3J1Egn4igfTirt814Dij1U5WQTGOgmKy8otwFBaT5Zg8pmUXQojRUFRVPcCv5U8PjyfAUGtAUcCZbeG5R+4b8jF7bQ/oeaLNjieWbqJZOsXFRUUdlIzywaL5J57Dsw/fN7qT7HH2ly5nY637oNsTqQT37P4973leQUHhK5XXcGrBeYP2m1WRS9AfRuNrxLLxTkzbH0FJRABIoaM5eQxb/ctojC4kRfoDXUnFyQvWUOTfjLNlHdpIsP98qk5HtKycyOQqIpOrSDqzh/3elv3Hpazb1oyvqRZv/Q689TsItDdCamCgMTld2Msm4yidjKO0CltRGRrdvtAxryqPXU0D60ij0ZBKHXrk6L3iyRTNoRSPftDEu/U9JPf8LBm0CsuqXZwzq4Bjy7PRao5svxiH3ThgrJhYJJwOO/1LFwFPJ6HeHg6U5jRaHfa8AgqrpmDPLyVnUiU5pRUYhtnqdDh8/L2NVGl1Bf5PGOhPUcDlsg/r90qmkrpI21sP4siTFpzDzB9XeLLdxgZf+l/8WbokFxQHueE/zmLju8+Nc+mGR6fR8fWqH2LRWXm14x/cV7+KYDzAuSWXHrCTaiqrnODJN9G35PsYdz2Facfj6N2bKNe+R3n2eyS1FjymJTREjmF712y6NDPpcsxEKb4Ip6+GPPcGXN1bMEV7MdXXYaqvg1ch4nDRVzaVcHk10Uml6EwaNNoUH+/GoqoqYX8Pfk87j/7ih3TW1aCmBo72bMrJx1kxDWfldJyV0zA6Dh2emt7ZuO+FAhargVBf7IAtOAfz2UtO47hiB919MV7c3sU/t3ZQ5wnx8k43L+90k2cz8LkZBZwzq4DK3CMTEDRaLYUVZR9bO3XQfvFImJ7WZrpbGulubqS7uYHulibikTC+jlZ8Ha37dlYUHPlF5E6qJLdsMrmTJpNTViF9e4QQh50EnMNEVeEjr5Gn2m2Ekho0qCzNDfO5ghAmrXrUPrWiUTR8peIaXLZcHqm9h6da/kow4eeL5VehVdKtU1rtx24HOcyQv5LESStJurej3fwIuq2PoQ12UND3BgW8wZJchVjeQvz2xXSqc2gKLKCjcwG7uvqwhjrI7d5Kbs9Wsnx1mPweTFs9sPVdEhoDvdnT6c6dRY9rBnFLFhAjlYyQSoRJpRygWoAEWtMktAY9xiwHJmc25lwXBosFRaegqgq+RlC0XjRaBUWrpL/qNGj1GrSG9BKLJFDV9L/KxkKu1cCli0v58qISdnYFeXZrJy9u78IdjPH3D5v5+4fNzCpM38I6c/rY38Lan6rAh5vqhri3FrInY8qeTMlcKEbFmArTVV+Py5igraaG7uZ6Qr3d+Dvb8He2Uf/hO+lDFYWsgmJyJ03GVV6Fq6KanNJyNFr5dSSEGDvyG+Uw8MY1PNZqY2sgPQlmqSnOl0oDlJqHP1fUUBiNR7ZnsqIofHnqN9AmTDzYcDsvdzxJa7iBb1ffgE2fnqLhQJ2V0/Jh8rVQuRJLzxacra/jbH0Ni3c7Rvda8txryQNmKjqYNJ/GLCcezVS61Gm0xRbTHEhgaavD0bYTZ+cODFE/ed2byOveBLsgaC2mO2cW3bmz8Dkmoxyg13bMn178TUEgOGj7J1nPLlC0aHWg1YNWr2KwpFC0GnRGFZ2R9FcTg1qUDlWn0wvsTC+w852TJ/N2fQ/Pbe3knd3dbO0IsLUjwB/eqGNZlYtzZqdvYemO8C2sT6IoCubsXJxaC0uOmYGvNwRAOOCjp6me7ubddDfV0920m5C3p7+lZ/eatwDQ6vXpwFMxhbyKavIqp2Bx5oznWxJCHOUk4IwhVYUPvUaebLMRSWnQKiqfzQ9xal6IwzlOX6DXe/hO/gnOLFqBU5/L3XW3ss23jl9tWck1U3/JXFyHPljREMqdSyh3Lm1z/xN9XztZHW9j61qDvetDjKE2aP2ISqCSV1BRCOhL6c2ppie/gubqMuo8NmhqIKfHS74/jDMUxtbXhq2vjfLml0nqjfQVVxMonUru8i/QHNaTSqh7lhRqUkVNqaSS6XVqSkVNpkglVdTkvvWpWJJkPEUylkrfhlIVknFIxgEUwj6Aj6cZFb0J9BYVgwUMFhWjLR189rb+OOzGg1bP+dklnL+wBE8wyrObO3hifRu7uoK8ssvNK7vSt7A+P7eIFfOLqcq3TdgJIM32LEpmzadk1vz+dWG/l+7merob6/A01OFuqCEW6qOrbidddTv797Nk56bDzuSpFFTPILukHM0EGkZACDGxSSfjEXQyfvnpBwf1twgmFB5osLLBm34cqtyS4PLKPooP0mpzxgWX0hf0jqLkaRabk6fuWT3q8wBc8LWVn9jJeC9Fgexsa/9j4s2h3aza+XPc0XYMGhO/PP6XFCYXjaoshr5WZrKDzlf/TnakBkvSM2gfT8RCS9hBaySHHuNUzLZi8oIRbM3NGBt2ow2HB+yfnDyF5MJjSS4+ltTUGTDMWyKqqjJ7Ui6vPPj6noCjkEyAFh2hQJJEBBJRhXgU1OSBE61Gp2K0q8w8vhJF14ezyIBOf+gPbVVVqe2N8tLuAK/WB/BF9/1czSl2cOa0PE6f6sJlO3hoOpSsbAvvrtk+soMVyHKY8fnDnLBfC85QqKkUfncHnvoa3A21uOtr8LY18fFfTXqTOR12qqaTXz0d16QqtEN8uk46GR95Uhdp0sl4/EgLzggEer0DAs6ukJlHuvIIJHVoUTkjp5dlTi/aCAQiBz6HCmzY/uGoy3LCMWeM+hyjVWaZzM/mrOaOmpvY6lvLde/8iFMLPs/Fk76BUTuyaQJC2hxqQpN4v30qAY8dY9JLqcXXv+Qaw7hMIVymEPPpALYR0JfSnTudnmnT6NJ/Fm2nB9PudOdkQ0c72t01aHfXwOP3o9rsJOcvJrnoOBILj4Eh3A5RFAWdQbvnNhSAuqeTsUKoL9X/M6Gq6dadWEghHkp/jfUpxPoglVAI9yqsfT495oyiAWeBgZxSI7mlJpyFBjQHaO5TFIUpOSam5Jj45gIXH7T18a86Px+09rG5zc/mNj9/eL2ORWVZnDE9n1OnuHCaD19/nbGkaDRkFRSTVVBM1XHLgPT4Pd2Nu3HX76Jrd7plJx4J07ZtI23b0p28NTo9eRVV5FfPoKBqOnmVU2RaCiFEPwk4oxBPKbzQk8PbvnS/k3x9jC8VdFFqPPBw/B/XW9c++kIcM/pTjAWbzsF3p/+GJ5rv4fm2R3it8xm2+NbyjaofUW2fecjjVVWlr6OZnpot9NRsxt9cN+CJp6TGQpuxkkB2Ea2uIqyGFM7obrKjtWRHd2GPt2KPt2CPt1AReIWkYqDbNAP3gjl4jv8CS867iJ1PvYh27fto13+IEgyge/t1dG+/jhFIVk/b17pTPR20I+/XpCigM4DOoIIT9iYfNQXRPogGFbJzi2jZ0U0kkKS3PUZve4y6DwNo9Qp55SbyK024ys0YTINbd/RahZPKbJxUZqM3kmB90MAzG9vZ3O7no2YfHzX7uOXVWo4td3LmtHyWVediMx5df9X1RhOFU2dSODX9s5NKpfC2NdFZu4Ouuh101m4nEvDTWbuDztodbCYdlHLKKimomtYfeozWo3eCVyHE6Bxdv/UmkK6Yngc682mPpW8JHO/wcXZuDwbNp7ctVqNo+cKkb3D29M/wk7d/Slekld9s/S+WF3+R80svR6cZ2KIQD/fRW7eNnprN9NZsIRb0DdieXVSCVm/B4SrGmp2HZr/QEQfclnm4LfMAMCT9ZEd2kRvdQU5kB6akl/zwRvLDG1FRUJ97huzSM+k94RqiOgeaXTvQrvsA7doP0NbtQlu7E23tTnj076h2B8kFS0gsWkJywRLIco5J/SgaMNnBZFc59ZIZNG6vJexP0t0SpaclQndLlFg4RUdtmI7aMIrSi7PIQH6lmfxKE1bn4BaZbJOOy2dP4vxZBbT5Iryy081LO93s7Arybn0v79b3YtAqnFCZw+lT8zhxcs5RF3YgPdZQTmkFOaUVzPjMZ1FVlUBXB5112/tDT7DbTXdjHd2NdWx77XlQFLJLJlE4ZSaT585DNdgxSAuPEJ8aR99vugngI7+Np90uYqoGqybJxfldzLCGD33gp8RxRcfz67l380DDbbzreYXn2h5io/d9vlJ+LcVBBz01m+mp2YK/pW7AJJIavYHsyTPInjKbnOo5HHfMTN782wNDumZM66DTuphO62JQVezxFvLCm8gLb8Ieb0VpepfypncpW/tr/EUn0VN+Lt4vXkL80itRervRrluTDjsbPkQJ+NH9+xV0/34FVVFITZ1J8pjjSRxzPOrkIXSgHiJFUbBk6bBk6SibZUVVVfxdcbrqw3TVRwh0x+lti9HbFmPnOz6s2TryK03kV5pxFhoGDTVQnGXi8iVlXL6kjIaeEC/vcPPSzi4aesK8UdvNG7Xd6DQKS8qdfKbaxclVueRaJ/AQ2p9AURQcBUU4CoqYcsKpAPT1dg9o4fF1tPZPQbH99RdAUXAWluAqr8ZVUU3upMkyHo8QGUw6GQ+jA1w4nuQP/67n6Q1tAFSbw3wpvwuHbviPf1/wtZU89/Dfh33cx539pcvHtJNxfYd3SPvqdFoSiQO/74oCJ5vq0p2V32t5gftb/kSIdKfTya1WFu1wYo2ms7Ulv5ic6tnkTJlDVvmUQaMGDzXgfBJTopvjZypE1j2CtXdr//qkzkp3xXm4qy8mnL3nNloygWbHtj2tO++jrR84LoyuoJCekirC1bOIVExBNRhGNNDf6ZecRtOOTx5zJuRP4K6P0NUQpqc1yv6zSBgtGvInmymoMjP35KkEQ/EDnkNVVWrcfby8083rNR4ae/cFcQWYV+LgM9UuPjMll5kVuePSyfhwCfu9dNRsp3PXVrp278Db3jpgu6JocBaV4qqoxlVeRe6kyegMn9xJWzoZD53URZp0Mh4/EnCG+JcvEk/yHw+sZ3d3CAWVM3N6OcXpZaRDkUzUgLN7x+ZD7qeQHswvmVQHfabHQn1oY0k2vvcBvfU7CHs6iBiSrJvqZVdZEBTQp7ScqlvG2dVfw55TeNDrjFXAgfRUDRvr3Jj8deQ0PEtO4zOYgs3924O5c3FXfYne8uWkdPtGDlY8XemWnQ/fQ7txLUps34dbSqcnUjGFxOx5+MqmkRzCKMh7DSXg7C8eTeFpitBVH8bdECER21fzRouOSbNzqJznoniq8xOfyqpzB3l5u5uXd3Sxpc0/YNv0QjsLSx0snuRkSp5t2FNF7P2ZmFKSjbe3b1jHHm5Op4Xejg7adm6ldcdW2nZuwd/VOWAfjVZLXkUVxdNnUTJ9NgVV09AbBwYeg9GA13vw8DacD3W73chYjB6hwoQcJkACTpoEnPEjAWeIf/ncwSjn3b2GHKuBFfZ6Kk0HeTxqiCZqwKndum5I+2o0CqmUSjwaobepie7Gerob6vF3dgzcUVGwF5XjnDyDvoos/hF9hrq+dCtBrqGAc0u+zIl5Z6HTDL5bOq86j50b1oz6fQFMm7+Euvb9+vioKSzt7+Pc/gD2xpdQUukWkKTejn/KBXinf5lozsDJKdVolKLGHez8ywNYarai8/cO2B7LLyE0ZRbh6plEi8s/caS/0798OrHwyD6UkokU7bU+Gjf30LSlh2hfon+bVq+QV2GisMqMa5IJneHgZejsi/NOcx9vNwfZ3BUmtd/fA6tOw8wcI7NyTMzMNmI3HKLT9X6Tjp5++twJF3AO9Ah8LOAl0LKbQEsdgZbdxD7256lotFgLy7CXTsZeWoW1aBInnTD3E1unhvOhfqQeXR8vEnDSJOCMH+mDM0R5NiP//OaxlBXYee7+7cO6HXE0aaz/5FsU8XCYkNtDrLcHf1sHEa+Pj//2yi0tx1RajbNyBlkVU9Gbrf3bZqmn8b7nVR5rupvuWCd/rf8jz7Y9xLkll3KC64xBQWfbutE/Sg/pgNPR1PqxteUw5Xr0k75NYdtzFLf8A3O4jextfyd729/xZc2ivfQ8ugpPJ6VN/0u++uST6WmN0XPWRejd7Vhqt2LdvQ19Uz2GrlYMXa0433mJpMVKuGpmOvBUTkf9eOdWdTjTIhyYYRpUTclmck4xm96oo7MuTLQvSUdNmI6aMBotuCaZKKgyk19hRv+xJ7IKrHpWTHeyYroTXyTJzqSVR9+qYXtvlL5Eig+7wnzYFUYBJtn1zMo2MSvHSIXDgOYonWpkfwa7k9wZC8mdsRCAqL+HQPNuAq27CTTXEQ/6CLY1EGxroH3NayhaHV2vTcdVOY3CqbNwlVcPeRweIcSRJwFnGFxWA8YDDP2fqVRVJeYPEHK7Cbk99LndxIOD/2Wut1mxFRRgLSjAWpDPWRddyca6Aw8YqFE0nJB3BotzT+aNzud4vu1hPNEO7t39Pzzb+gCnF65gad5ZmHXWAx5/OMSN2TRXfoXmii+T3fMRxc3/INf9Flm+rWT5tlK16zbai8+mrex8UGZSetysPUfOBs5Ao9WQ7OlFWfchyofvo6z7EG1fH7bNH2Lb/CGqVos6aw7qMcehHnMclJQyJvcmAEWjUDzVSSLlZMbSLHydMTp3h+msCxPyJemqj9BVH0HR9JJTbCSvwkReuQlr9sAP5iyTlouml+Ls6SGZUtntj7G1J8LW3igtwTiNgfTyfFMAi05hSpaRqc70UmzVZUTgMTpyMM7KwTVrcfpn39fT37oTaKkj3hegdfsWWrdvYePzT6DV68mrnJp+nH3KLHLLq9Dp5VeqEBOF/G0U/SLBAMG2dkLdPYS7uwl395CMDm76NjmdOIoLMGTnYMlzobcMf7Zrg8bImUUrWJa/nDe6nuX5todxRzt4qPF2nmr5KyflnUV2/lfH4m31KyjJP/ROpefgmXsOvSE3ztoncO58GEOwlUmNDzGp8SFS7afhDk6j1TwbVdGAAiazgUg4BkYDnHQyyvEnYmtpJquuBmdtDeZuD8qmDbBpA/zlDiLZObjPPQdDfgWxqdNBNzatAIqi4Cw04iw0MvX4LILdcTrq0mEn2JOguyVKd0uUHW/7sGTpyCs3kTvJSE6xccCtLK1GYYrTyBSnkfMBXzTJ1t4IW3uibO+JEEqobOyOsLE7fZt2b+CZXWil3KQhlVI/cRqK4Riv/iWKomB05mJ05uKavQRVVYl6PeTrQtRv2EBHzTYiAR8du7bSsWsr8Bg6g5G8yVOpmr8AR2k1OWWVMoGoEONI+uCMYKqGJ++9fdS3qMa7D46aShEL+In5fMT86SURGty3QNFqsLpc2ArysRbkY8t3oTMYULRKei6nA5z7pDMupq4jMKzyRJMR3mx/keebH6e1ryF9bRQmp0qYm5rKtFQF+lHk8Qu+tpI1r70y/APVJNm9Gyjs+BfZvRtQ9rzjmKWI7ilfoKfqAlIWF6nUJ/xAtLWh+WgNmo/WoGzZhJLY12cmZTITmz2P6LxFROcsQHVkDbuIJy6ZccgOy33eOO6GCO7GyKAnsvaOpjx5QTFt/m6MTt0BR1MGSKoqTYE4u7xRdnmj1PljRJMD33uWWc+sXAMzXSamu0xMyzVhGcJ0FAcyVv1LnNlWalp7D73jIeztQK2qKr7ONjp2baWzZhsdNduIBgf+zOuMJvKrplE4ZSaFU2eRU1oxYCwn6YPz6SB9cMaP/PPiU0BVVRLh0L4w4/MRC/gH9Z0B0FmsmLJzMGbnYHLmYMxyouz3SzkYSKEoEcwWPeFQ/KC/uNSDPEJ+MAb0nFF4LqcXnMOm3o94vuVx1ve8T52mhTpNC0bVwMzUZOalplKqFqCM1T2eQ1G09OYsojdnEaZwB/ONu0i8fzfGUDtFG/+P/I2rabfPoda4gC5DZTotHEiWBk47Ds3Shdgam5mtteB99TW0fh+mj97H9NH7qIpCvLKa2LxFROctJFFWsW9mzlGyOvVY5+upmG8nEUvR3RzF3RihuyVC2J8eTXlte0N6Zw0YnTpMOTpMuTpM2Tq0xvT70ioKlQ4DlQ4DZ02yk0ypNAXj7PJF2R2Is7Mngi8c592WOO+2pG9nKkB5loHpLhMz9iwVWYZhP6U1Wo3bmg+90yFMKUk/KafsGVPHWVjC9JPPRE2l8Ha00lmzlZ6GXTRv20y0Lzhgagm9yUxe5ZQ9y1RMc2Z90qWEEKMkAScDJWOxfUFmT+tMKj54nBSNXo/BkZVesrK44Nvf5bXnnh6TMnRuqB3xsYVk8zW+Qdl5N/D7J3/GZk0NPiXIeu0O1mt34FCtTEtVMD1VySS1EM2gmbwPLKcgb8RlSssjNesrPNuYQ2l4C9XBD8iNt1Aa2EBpYAN9WicNlgU0WBbQpzvw3FYpgwH/lCoKvraS2rO3oGvcjXHjWowb16JvrMewuwbD7hpsTz1MMjuH6NyFxGbPJzZjNqplbPol6QwaCqrSY+gAhHyJdNDx6WnZ2UusL0G0J7349vwxGh16HEVm7IVm7IUmrC4TeosWRVGoBJaRHm04nkgS0+l5eU0t2z0RdnRH6OpL0OCL0eCL8WJd+tF0k1ZhSq6R6mwTVdkGJmcbqcgyYNQdnbOFKxoN2cVl5JSU4XJ9AXeXj57WJjp2pVt3Omu3Ew+HaNu+ibbtm/YcpODIKySnrJKcknJyyiqwZrsGDeAohBgZCThHuWQiQczvI+rzEfN5ifl8JMIHeIxVUTDYHRiy0mHG4MhCZ7YM+GVqtjuOYMkPbZJ9Eqckj+EzycU0Ku1s1Oxku6Yev9LHh9qtfKjdikU1MSU1iSq1jMmpEswcfGTa1jGY+6t61jySip5GywIaLQvIjrcyJbKO4sBGrEkvswKvMyvwOl2GCuotC2kxzyapOchowRoNicpqEpXV9J3/RTS93Rg3rcewaR3GrZvQ9vZgefMVLG+mR1SOT55CbNY8YrPnEa+sHtV8WftLj6ZsY9L0Kl5+8FUSEYj4lfQSUIiHFaL+OG5/HPfOfWPnaHQqBgvoLSoGi4o9R0dKE+esy04le+a+MYG6Qwl2dKfDzg5PhJ3dUfriKTZ3RdjctW+4BY0CJXY9k51GJmcbmZxt4OSCKAW2waM2T3TKflNLzDx1OalUit7WRty7a3DX78JdX0Owuwt/Vzv+rnYa1r4LgMFiI7tkEtnFZWQXT8JZXIbRIvNpCTESEnCOMolImEhPD9HebiK9Pfz5xWdIxAZP7qmzWDA4nBiyHOmvdjvKJ4zLMtYK5kwas3MpKFSoxVQki1meXEq90soOTT27NI2ElAgbtbvYyC4UVaFYzWOyWkplqoQSNQ/dYf4R7zWUsCmrgrX2z1Ic2kZlaB0F0d3kxxrIjzWw0PcsLebZ1FsW4DGUH/wWFpDKziW87HTCy06HeAzDjq3pwLNtE7r2Vgx1uzDU7YJnHiNlNhObPjsddgrH7okzRQG9GfRmFXtB+v5jKpGeIDQa3PO1TyERSc+MHvGnwxBA924V0PH3re9iydJgduiwOHSYHVpmOPQsqjZhnKdFBZr9cXZ4Iuz2RtndG6WuN4YvmqTZH6fZH+fNpmC6QG+0Y9YplDkMexY9k7LS35c69Bi0Q/uZnjS9eszqaCQ0Gg25ZZXkllUyfdmZAGiTfWx96216WhroaWnA295MLBSks2YbnTXb+o+1OHNwFpX1B5+swlKZYkKIIZCAM4GpySRRn5fInjAT7e05YOuMotNhzMraE2jSLTRa/fjOMdT90QejPkd1+bmD1unRMVUtZ2qynFQyRaPSTq2miTqlBbeml1ali1a6eEu7Dq2qoVjNZ5JaSH7bfPpSfVg1h+fx86Sip9kyj2bLPMwJLxXhDVT0rcee7KYytI7K0Dr6tFm0mGfTbJ59wP5PA9+ogdicBcTmLABA0+3GsHUTxq0bMWzbjKYviGn9h5jWf0jjfXdDXj6aWbNRZs9BM2sOSt5ob8fto9GB2alidsLe3vWpJMTDEAulW3hiIUiENcTCKtG+BNE+6G0bHLwVDZjt6dBT6tBRbTdiLLFgnKIhYoC2eJLGYJy63nTwaQ7ECSdUdvVE2dUzsCOtAhTadHvCjoFSu54Sh55Su4E8i+6I9/EZLqszh+IZcymeMRdIt8b6OlrobWuit60Zb1szwe4uQt4eQt4e2rZv3HOkgi03j6zCErIKikmGF2LKKcbscI7bexFiIpKAM4GkEgkiPR7C3R4i3W4i3l5IpQbtZ3BkYcrOxZidw1mXfo03Xnz6qGvCHwsaNFSqJVQmSzgD8BNkt6aFOqWFJk07QSVMs9JBMx2889rVAOQb8phsrmCyuZzJ5komWyqwaof/mPsnCeucbLd/hu22ZbhijVSE1lMW3oI16WNa8B2mBd9BXfU8k50n4S48jYBj+iE7E6dy84icfBqRk0+DVBJdYz3GLRsxbNuUbtVxd5F64zV44zWSgLakBNOixZgWL8a4+Bh0+UN4RH4YHbc1WjDawGhTARUUsFh1BP0xFp68hNp1DYQDCcL+BCF/krA/QSSQRE2l+/yEfAm6OfCTP9l6hZOsWk6zWciqdNARCdKjVfGoSToTSdrDCZqDcfriKdqDCdqDCda0DQz+eo1CkV2fDj12PXMDBrp7o+RbdGQZNBNy3B6tTtd/W2uvWCSMr72Z3rZmetua8LY1E/Z7CXZ3EezuonXrera99hwAZoeT7NJyckrKyS6tIKe0HHteIZoj2HK7l91uRFEglUxgtxtH/NTpRJ2GQhwdJOCMo2Q8TqTHQ8TjJtzjIertHfQve43BiCknB1N2bvrpJmf2gAkpc0pKP5Xh5kAc2Jifms58pqMmVXrx06Rpp0npoNcZoSnQRFfMTVfMzfu+fSMku1QnZWohZWohReRRqLrIwnbAJ7WmzF049AIpCh5jBR5jBeuc51AYqaUsvJniyA70vmYm+dJj64RNRbgLT8VdcMqQwg4a7b6+O+deyPGzy1n78D8w7NiKfsdW9A11JFtb6Wttpe+ZfwCQyC8kPm0mserpxKdOJ5lfOOg6Jy6ZOfT3dvCi4SqzE+obHBpTKZVIMB12wntDTzBJpC9JtC9JJJgkEVNJxlX6vAn6vAl6WtIDRpqBsj0LaEAxELdoCFgUvHqVXq1Kt5rCnUjSGU0QT6k0+WI0+dKtSI9t9/aXQ69RyDdryTPryN9/sehw6DVH7O+TRqulsKLs0DtOnzrgZcjnxdNUT3dzA56mBnpaGuntaCPs9xLe5u1/agtAZzDiLJ5ETmk5OaXlZBdPIquoFIN5bEP9xylAS20D2dlWenv7RhxwSqsrxrJY4lNGAs4RlEomiXl7ifR089D1/427YfDYJTqzBVNuHmaXC3NOHjqr9agMMEbj+P5oKSjkkEVOKov5TOeC81by4l/voWVPi06z0kGz0km34sWzZ1nPjv7jzaqRQlwUqnsWXBSpLkY6bFRK0dNmnkGbeQZaNc55J1bT99Ej2JtfwxxpZ1LDA0xqeICYrZTApDMIlJ9JKH9R+v7QIWgs1vSTVrPnp997OIy+ZgeGHVsw7NiKrnE3uq4OdF0dmN96LV0eu4NY9TTiU2YQnzKNeHnliN7XcGg0CpY9/XIOJhFLpcPOnsBjNmfR0eDeb12KaCgJqoK+TyWnT2Xf82oaQEMKHX6NSq9GxW8AnxECBoWOeBxvKkU8pdLal6B1vzm89jJqlf7Akw5A2v7XthGO5XMwqjKa6TocUDoXZ+lcli+ZgaejB29bMz0tDfS2NNLT2khvaxOJWBRPQw2ehpoBR1uyc3EWleIsKtvztZSswhLp2yMyigScw0hVVeLBIJGebqLd6RYadc8tp71DgumtNky5Lsy5eZhyXejH6FHg8ebz9Ix3EQYxY2SKWs4Uyvv/RdlHmBalsz/0dCgePPQSVqLU00q9MnD+qj88/gAOnZk8NZt8NZs8silV89EO469SUtGjTj+Hd5smocm9lnzv+xT2vE6+9z0MwRZyt91L7rZ7iemy6Mo+gc7spXiyFpPSHHh04L1js+ylms3E5i4gNjfdf0cJ9aHftQNDzXb0tTvR19ehCfj7+/AAqHo9LfPm4TQ4iRVNIlpcTnIc+nToDBp0Bk3/VBLl08po3DkwiKRSKrFQKt3yE0z2twJFgvtagnR9SZwJFRJA/90rI0nU/vDTq1Hp1abo1ap4tSo+RSWaVGkOxmkODh5WwaxTmFLXR5HdQGWuhcm5VqpcVkqyTOPe30dvNPWPsbNXKpUi0NVOz97A09KAt70l3aent5tQb/eA1h6UdN8eZ2EpzuKy/vCTVVA07n36hBgJCThjLBmNEunpJtLdTaTHQ+pjTzhpjUZMOS5OWvEldtTsQPfxSRjFEWXFzDS1gmlU9IeeOAm66KFD8aQX0l+78eKNevFqvDSx3yPnSbDoTLhUJ7mqE9eeJVd14sT+iYMSprQmOnI/Q0fuZ9AkI7h8H1LQ+xb5ve9iSPgodb9AqfsFEhoTnqwldGafhDv7eOK6oT/Sr1qsxOYvIjZ/0Z43GEffuBt9zY50S0/tTjTBAJGPPsK533EJWxax4klEi9OBJ1ZURsp0eG9tDIVGo2CyaTHZtFBw4H1UVSURVfuDj9Wey+Z1TSTCKWyRFAXhFIlwilR4X4tcAhXf/sFnz/denYpfUQknVDa1+tj0sWsZdRrKs81UuaxUuSzMr3SRZ1AodJjGta+PRqNJd0QuLKFy8Qn962OhPrztLXuW5j1LC5GAn6Cni6Cni5Yt6/r3VzQabLn5ZBUUk1VYjKOgmKyCdAdno1UeYRcTlwScUVJVlXggQNjjJuJxE/P7BmxXNFqM2dmYcnMx5bj6bzlNO3EZtc2N41Rq8Un06CghnxI1f0DfgRhxSs89lkf/eQddSi8epRe30otXCRBSIjQpHTTRMeBcOlXbH3pyVScu0t9Hk4M7Tqa0JrpyltKVsxRFTZDt30RB71sU9L6NOdZFYe+/Kez9Nyoaem2z8DiPxe08FphDfknR8N5kxSRY9pn096qK2txEflstXe+8DzW7oLEeXdCHbtdmLLs29x+mlpRC9TSYXA2Tq6CyCvZML6HRKqSS6lgNvjwqiqKgNynoTRrsuXomTS9EUzr4110iliTiixPxxoj44oR9MSLeOBFfjLAvTiqebnGNo+LVpPv69GpSdGtVPFqVHm2KaCLFLncfu9x7JqJ9qwEAs15DZa6VybkWJudaqHJZqXZZydszro+CMvw/twO912GO6m2wWMmvmkZ+1bQB6yMB/36hZ9/XWKiPgLuDgLtjQPABMNkcewJPenEUpr/arCWjfl9CjJYEnBFIJZNEu7v7Q83HJ6TU2+2Ycl2YcnIxOrOP6PgzE0Xh/CmH3ukoY0DPjNwZzE3t1+lTAcWk0hbp3tOXpzfdrwcvPYqPhJKkU+mmk+4B57rroSfJ1uSRrysiX1tMnq6YAm0xeboirBo7qqKjJ2shPVkL2V7+HRyhGgp63qKg9y3s4XpygpvJCW5masvdqA0/wWFeiDvrWDxZi0noRjLvjYYpF17EhmAU5i1AicXQd3ViaG9D39GOob0Nnc+L0toCrS3w5qv9RybsduL5BVBcTCjXRfyqb1A0qWRMfu6VTxg3aLia3tl4yH2MJjAWQlZhur9/KpF+HD4eVnCFFbJz8+lp7yPYEyWVVEntafXxaFS6tSk8+wWfcDzFto4A2z42L5vDpKPaZWVWqZNUj48Sq54iqw7TEMf0+bgpJdljMrmp3Z6Hye6gcOq+zuaqqhL29eLvasfX2ZZeOtrwd7bR1+shEvQTCfrpqtsx4FxavR5rtoucomKMjhxsufnYXQXYcvPQGcZmIlYhDkUCzjDUf/QuTevfpW3T+v6+NJBupTHl5mJypTsHa4fYUc9oHJuRaCeibes+PPROhzBt/pIxKMnhZ1QMFOGiKOUasD5FCi+B/k7M6fDjw6P0ElFi9KS66Il1sYOBH7wOvZNicxklljJKLJMoNJWiM1fSZzqeZs1P0AdbsbX+G1vrv7F2vIsm2ElpMH0rK4UWr30W7qwl9DgW4LNOQ9UMf7Zy1WAgVlpGrHTfUz6aUAh9ZzuGjnb0XZ3ou7rQeXvRBQLoAgGoq8UMNDz9JKrJjFpcBkWlqMWlqEWl6dfZuTCM4DPr+OnDLvtYURTQ6tOLyZF+HP70S2bj7Q2RSqr0eaMEPBECngh+dxhfZ3pJhlKk9rT4uLUq3Zp08OnWq/QoKfyRBOtafKxrGdjam2fSUmzVU2LTU2LVU2JNd3Qeym2usZq0c3AdKFicOVicORROHTh3VjwaSQefPYHH19mKr7MNf1cHyXi8f5TmjzPZHFhzXFizXdhyXP3fW3Nc0slZjCkJOEPkd3fw73tX9b/WmsyYXXmY8vIwObMHTEg5VIFe7xiWUEw0GjTpJ7nULKaq5f3rVVSWXXIJb6//F63R9vQSaaMt2oEn3o0/7sUf97LDv3nA+RQUXPocCo0FFBoKKJi0kOLJp7Ms245x5/s429/F6NtNTmATOYF0T5GUzkwobwF9BccSyl9IJHc2Kf3IOrKnLBailVVEK6v2lSkaRe/uQu/uxNTtRtPWjrGnGyJhlN27YPeuAedQjSbUwhLYG3oKilHzCyGvEIyD/2WvKsrQHqU+pLG7b7a3tcTpNEHFwNnfVVUl7I3R1Rygpy1ET3sfvW0h/J4waijdz6dbq+LeE3rcuhQenUoQFXckiTuSZGP3vukr9BqFYquOEqs+HX6sekptOmz68f/Hkd5o6h+deX+pVAol6qNm3TqSIS9dzS0EPV0EPJ3EQn39rT7dTbsHndNotQ8IPeHumejsudhz8zFkyAMY4siRgDNE9tx8Fp53CWaznrrGOnSWo/PxbTH+FBRyzbk4d6k4KWQWhUD6iacoMbrooUvpoUvpppMePEovHnqJKnHc8W7c8W42s28o/1v2TJJtybEwzT6fZeEYC8N+pkY8WBNhbO3vYmtPz3WkohA0lOI1VuMzVhMwlhMwTCKpMTO1/PRR3VrUaDSkUimqZi3Av30nifp64vW7931tbESJRlAa66Bx8OPRmrx8dGWl6ErL9iylRHYoNCk2sI7uw21KydjdFvnE1hIFsrOt6Kx95FdDfrUZMJOMpwj2JPC7Y/jccfzuGMGeBKlIupNXSFFxa1N4NHtCj0HFQ/qR9sZAnMbAwKe6HAbNnlYePQFXK1nhKJOyhj51xeGk0Whw5BdSOGXmoHFwYuEQfb3d9PV4CPa46ev17PneQywUJNoXINoXoLu5HoDtb7zQf1692YItJw9bbl46BOXm7fc6TwKQGEQCzhApGg1zzvw8zmwLTffePuKBqya6Id02U0BVUxiM2oyth/FixEAZ6UEH969bFZUgIdz09t/u8rCnk7MhRDAeIESI9foQ6/WAQ4Oi5lEdj7M4EmVxOMKcaIyiZBJ7rBl7rJmywOv95+/VZeH96zTC3iRBbS4RbS4xXQExbTaqMrSfCZNZTyQcZ9r8JazpSIG5HGaWw8xT0vskE+g9neg7W9F3tKS/ejrRuTvQhvtIubuIubuIrdvXkbUnfWqSZivJbBeJHBeJ7L1LXv/rlM3xiQMkfvxR+tEoPkSLklanwew48Gzy+9Po9bzw1AZi3iRRX4Jsb5IKfwI1BoQYcJvLo0vRbVRxa1R6Uin8sRT+WJTtvVFeaUm31mkUKHMYqHSmZ2evdBqochrJt+omzD/GDGYLBrOF7OLBdRiPhOnrTYedvaEnEQ7Q29FGJOAnHg7R29pIb+uBH84YFIBy8rBm52LJysaSnYvZkYVGKx95nybypy0GGNJts/0+zCTgHBkKCnas2LEyWS0dUO/LLr6Ufz74T3x46cWb/r/aS6/Si9fg5QWDl0ccPlKkcCWSzIlGmR2NMSsWY0osTn4ySXbCB81r+PgMVnGgU6enXWekS2fGo7PQrbPi19oJ6mxENDYMGDGgx5Yyoyga1nWtoy3eilFjxqSYMSomdIoetDriBSXEC0pg7n79q1QVTSiIztOJ3tOBzt2RDkKeDsw+DymvF224D224D0PbgT/cVJ2ehCObZFY2SYeTpCObRFY2yT3rYpMtaCIhUkbzoUeK/sQ/CIV4cvD0KftLAclD7ANgNmswZukwZumwk25hSj+VmSLqSxD1JrD4kuT6EqhhLYTTx8VId2R2a1P0mMFv0dASixFKqjT6YjT6YrzRGOy/jlWv6Q89k/ZMWFrqMJBvHVr/niNFbzLvGXtnX/gpra7AH4gSj0bSLT/dboI9boJ7vu59PZQAhKJgtmf1Bx5LVjYWZw5mhxOTIwuzPQvTnkWrk4/GTCB/iuJTIxOf7NrLrJgxY6aQPY8df+xzK6kmCREiqA8Q1Adx2wI8S5CgGkBNdpMb66RcHyTL20lBIkJxIkZJIoFRhdJEnNJEHAgOum46AGlp1+lo12np0Or41+OP0bH3tU5HUKNBp+gxac2YdRbM2vRi0lr2vDZjclow51qwzLJi003Dpj8Gm97B3LJytrxei90XxtDbg67Xg67Hg7bXnf6+14PW70VJxNH3dKHv6Tpg/TT9CSYBKZ2epD2LpC2LpM1B0mYnabGTtNhIWW0krXaSFitJix3VaBochlR4592aA15jb73b7CaCgcghw//pZ8wdfLiiYHBoMTi02Mv2hZ5EX4qoN0HUlyTqTWDyJSmOaSAG+EDFSFABtzaF1wK9JugkRUc8QV88xRZ3hC3uyIBrGbQKJXZ9/0Sl84IG8s16ypxmssyja/XZOw2FVqvBmJUz4n8Iafb0bdQbTTgLS3AWHvjx84MFoL0TlYa8vaipZHo6C7+3/xbYwRjM1j2hx5EOPTYHRqsdg9VKVo4Ts82O0WbHZLVhstkxWqz9ZR1k4mTITx0JOOJT49P0ZNfHaRUt9j3/DaCQnuFAD6d/+XSeumc1bUArKu+oMbSpboxJN+ZEN7ZEL46kF2fCT1YyRFYyjB4oTSQpTSQPeu2AotCh09GxJwh16LS0a9Nfm3Q6OnVakgf7MN2wp5haBVOeBUuBDatiw6bJwqYpx66Ziw0bzoiOrD7I8ifI9sWweoPofV60/l60vl5M4QApvx9NIo6m14O+13PIOlO1WpIWG0mLPR1+LDY8LetwdMdIWW2kLOl1qT3hKGWxoRrGfsRfRVHQ27TobVpspXvKpqokwymiviSlLhfN2zswe+LYA0nwk17QkkRHj0alxwQ+G/RqVTxqiq5oglhSpd4bo96bHoz0oa29/de0GrSUZpspdZopyzZTmr3nq9NMidOM2XCI25Z7pqHIcpjx+cMjDjgnLJkxpP0OFYDUVIpI0E/I17sv9Ph6CfX2EA54ifh9hAM+IgE/aipJLNxHLNyHv7NtyGXVGU3oDMb0YjSiM5j6v//iT3455POIsSMBRwgxiIKCUTGCtpiktpigId1+s/8whoqaxJwMYEl6Mad8ZClBDJEeJhdn423ZjjXWjTEZxK6q2ONxpsQHT38AkERDj95Cl95Ch85Em15Ps05Lo06lxajQlAqjKhBW+wgn++im88CFNu9ZCkCHHrsmC5vGgU1TzPSySvw1vTiiOrJCCs6gSrY/QZYvhs0fQRfqQxMKou0LoA0F0cSiKMkkuoAPXWDf49zeLR/xSb1rUgYjqtWGw2zdF34sNpJ7AlDKZCFltqCazIRdSfTtbel1JnO6xWiIj9ArioLOokVn0bLojAryJqens4hFUgQ8MfzuOAFPujOzrjdBXoj9pqzQkkKHT6MStCr0WRW8BhW/XkdzMExPNElfLMnOziA7Owe32gFkm7TkW3S4rOlJSvMsevKsOvIs6dfzF0w94HHjxZFlJivLDCUHGfp6DzWVIhLqI+zzEvJ7Cfl86VafgJ9oX5BIMEC0L4i/u5tYOEQsHCIRTbeMJaKR/u/FxCABRwgxIqqiJaRzEtI5QQH3nn5Z5V9aybPPvQKALhnGGuvGEu/GGvNgjbmxxrqxxv9/e/ceJEV1N3z8290z0zPD7A2Wi2gCgpDgZhVcX4zxipUUPFGr1jL1JMZLEYhJ6sVQVkgkEEORUuENpBKMeYiKwZCIwRANj2+9uSjvmyJqqSEYRIxEWC4iRNh1L+zuzPTtnPePnp29AgvCDrvz+1CnTvfpnp7fHE/t/OzT092QX7d0wEivjZFeG1V9vE9g2rTHRtESG0FzrJyGWAlHogkORSK8F4F6w6FVtdCqWmhTLTg6i49Hk2qgSYVnat6p+0d4pqojCRrReXwLiyTDSDGMFKMYxgRSKkHKi1KatSjNGJSkNWVtAdWjR9P81kGsdDtme1uYFKXD2lAK03XAdbCaPuzjk3R36HHoeb5B2QlUIpGrw0RI2Ql0zEZFY+hYDB2NoaM2OhZDRWMcy+xHfdgEtk0kFqMiZlMx3MYYEwPLIiBO+zFNa7OirQXamhXHmhVmRlPRSu7BeAYQADE8NE7KJJMyaIvDMUvTjKbeCziaCae8mrIBTdmAfzX2viM3gPHf+6lIRBk+LEpFIkpFso86V5IneIL7mZrd6Xi6+SmJlpCsLCFZeUG35gs/cSFutvNzB76Pm0njtLfhZrN42UxYO5n8uigMSXCEKJAzdk3QOTzH71sJWhIX0JK4oM/thg5IeE2knKOk3KOU5OqUe5SUc5Sk14ilHEqzBynNHqSv3y8FsVLc1AW5Uk1m2Bia4mXUx+IcsSzqtUMs6bGv8TAtbhPNbiPNbhPNTiOZIE1AQCvHaA3ndUImYOdK91vdYI62SBhJ4maShDGMuDGShJEkoWMk/AhlRpRomyaRVcSzPnbWI55288XO1cODALO5Dau1HcMLz8CYTgbTObUvxKMbT75PR143qtsHMdFWBG1aaCMsvjbRWCjDBMNEG2Zum5GrTdLxBA2pchqTJTTGS2iMp/jQHkZDNMmHkSQNZhwPi8a0R2O677N2Xdkoys2ACjOgwlJURBSllqYkAh+vSJBQAWUxkzLbpDxmhQlRJAKmiWFFwLLAMsGywnWzY9mCWAwjZqOsUnQQnNb9yvry/qsnnu6OAlFskjEb7LIT7ivOHklwhCiQM3FNEAze64IgdxYoVkk6VslRLu61/baZ1/C3bf+XePrfJNpzJf1vEu0fkEj/m5jTjOUeI9H4TxKNnfcG+niXYwSWjVl2PvVtmoxVmisVpK1xtFnD+NCK0WBFaDcc2o0saTL5Om1kac/VaTI4hocioF230h609oo3/FBAMlf6wcAgqpNEdYSotohoE0uBpQwsBZGAXnUk0FiBJhJoSu0ElhtgeWExvQDLDYi4PqavMH2F5SssBaYKj2FpMJXGUh6W8jrb8/vo3D492yGShfPT8HHV93a0wTF7GA3xMpriJTTZJV3q0vx6o11CJhrHweSIMjmiouHT37ueFGqEnl9TkcCl1G2i1G2nzE1T4rZT6rZT4qZJeRlSXoZhXjZXZ0i5uTY/i2WZEIt1K0bM7rEeg55tdhzicUgkaN0/DrVnH9g2hm2HN6i042Edi50zP8kXkuAIMSRMuuHyQofQzXkXjjr5Tv0Rsanb3QqkgElhiQLlYYmoLCn/w7B4DaT8htx6A8P8ZmzVjhU40LiXE0WkMMiaKTJWKY4V/vw9a6XImqVkzbH5tmtv/588/9e3yZImo9JkdYaMbifbZdmPOLQ6rTg6i6ddPO3iaiescfG0g6sddO7KW43GNVxcwz3NTjpOopVnAAN752NTOZi6HlM1YGoDUxmYOkzYTG0QVTBWGxgqivJTqKCz+GoYSiUJVAJFCk8l8FUCVydRRPGtCI2JMhoTp35mxA6yJP0sKS8dJkFuhpJcSbkZUu1hcpTwW0n6WRK+Q8J3SPoOCS9cPqyPf0E9hhEmR3ZnMeI2fG7GR+hNcbokwRFiCHjjD+s/8jHGzZl3BiIJjbLP3I31Lr3yM/3e1wOacgXA8LNEM0eZVAF//9/rSATHuhd1jETQiokiqVpJqtbwIMfzk4f5ohnFj1cSJEbgxyvxE5X48eEEdgWBPQ4Vr8CPluLb5QR2OYFdBj2eB6a1ZsLHKnjqN0/j4+X++Xi4KAICFCq/FOSWO1o6tobrVZdczNG2I/jKJ9ABgQ7wlY+vfZRWKK3CdhV0LuveyxEriue7BCrIt3ccq+t6vqgwjr4oM3wQ6cnzqiwnS9AMOqZ8QKsoOkiig2FoP1d3rAcJtEqggzh0WdYqASr8yb1jxXGsOE12+ckCOy4TnwguERyi2iGmHGJBlpjKYgcOceUQzyVGCd8lHrj8+LTfTXwUgzrBcRyHH/zgB7zwwgvE43HmzJnDnDlzCh2WEEXvX1uePyPHuXjivDNyrIvmzOO95PY+txlaYav2fNITV23EgzbsXN11PaYdTOURS/8b0r0fJHk8QTRFECvrTHpiZdgjx/LppsN4ho1nxvHMOH6XZc8Yhmfa+GacwIiisPq8SeFtl3yRY63H+njXU1NaUsrmpzf3/wUmKK3Q6C7JWPjvqluu4rnnNhGNW2SzTmc6phUq0AQ+BD4oXxMEGqU0KoBAKQKVW9ca0zRxXR9lBGhDofIlQJk+vuERmD6B2U5gthBEfALTwzfDOjBy60aApy08IgQ6gq+jBDqKDhKg4t2So7DNRisbrWL5ZXR4CwCVS29ckp0nx06SyEmCUxiDOsFZsWIFO3fuZN26dRw+fJiFCxcyduxYZs2aVejQhBCDhDZMslYJWauEpl6/a+ruljvn8PxTTxLPnflJ+C3Eg1biqhU7aMdW7cRJE/XasFU7tgp/m215bVheG7H2Q50HOwC9b/V3Yr4RJTCiBEYEZUQIjCiRx39GxA1QVozAjKGtaFib0dxFwl0vGO68cBjDRNPZVj7qPC5qPIDGCF/TsQ0TjZHb3wCM3Os6l8HIrYf7jNqnuOl8E8M0UDoBGGB0Hje/bJhoMxLGmiuqy/LkiePItmUIVATPt/A9jecofFfhuQG+o/BcFdZej3U3IPAUflahApOG+kZQoAMDrQxUECZovul1T4a6JUftBGYzvhmeZ3NNjWNqXEPhAq4BngEeBh4GPmauWPjaQmmTYICnB0WnQZvgpNNpNm7cyJo1a6iqqqKqqordu3ezfv16SXCEEGdHNMFF197cq9nNlVbANA2Uyt3ZTgVY7jEst4WI05Krm7HcFsYkNU1HD2F67ZheO5bfUae7tKUxulzzEdEeEd1jDu3IkZ4/9Do9/wqvcjojnoPxZ+hQXSf3tGGBFQUrTODC5SjajIEVCacCrSjajEA8CskomBHMaIIjqSa0aaGMCNqIoEwr/MUYJlqHSY/WgDZQygANWpFrD7dpZVBeMpz2tItSOpcoaZQCpQyUMgmC8PVhbRL4ctFxoQzaBGfXrl34vs+0adPybTU1NTz66KMopTD7ecMsIYQ4FSecMuv3c9rKuGXOPP7f2v/qbOprqkPr8HyA9jF1gKW9cJkg1+Zx7cwbefWPz2HhY+pw3479DVSuaAytuywrwmfLh8sGmolTqtjzr3e7tOWKVrk7EeReH2YB4X6549LtPTSjx4yk4fBBLNNAqaDLfp2vI9dmEmDqXKFn3f0aH0MH4AfgZ0/57gjnneL+x9V+ku25k1Tdv13/15l6d3EKBm2CU19fT0VFBbEut0avrKzEcRyam5sZPvzkT/OF8JYJup+3Ee+Y/o7Gov1+zYlEYtGT7zSAx+nvsQwDIpEo0djx+26wfrZTPU5/+uJsx3Mmj3W6x+naD2cynjN5rIE4zqmOh/7FFEMT3oavr9/v6AkzqC87wfOx+unC//gab7Q++5GPA/CF/7yVV9Y/jm1HcRzvtP9e3vzlufyf9Y/1ToC0wuiVGPmYuYTJQOX3NQiY+j8+wz/+/vewjQBDKywd5JPAjmzUQHf7D2d0bc8tT5x4IfvffadbW8frjC5JYEcyaaIYe7odKT4SQ+sz8VU98DZt2sTDDz/MX/7yl3zbwYMH+exnP8uWLVsYM2ZMAaMTQgghRCEN2nkc27Zx3e73jehYj8fjhQhJCCGEEOeIQZvgjB49mqamJnzfz7fV19cTj8cpLS0tYGRCCCGEKLRBm+BMmTKFSCTC9u3b823btm2jurpaLjAWQgghitygzQQSiQS1tbUsXbqUHTt2sHnzZtauXctdd91V6NCEEEIIUWCD9iJjgEwmw9KlS3nhhRdIpVLMnTuX2bNnFzosIYQQQhTYoE5whBBCCCH6MminqIQQQgghjkcSHCGEEEIMOZLgCCGEEGLIkQSnnxzHYfHixVx++eVcffXVrF27ttAhFcSLL77IJz7xiW5l/vz5hQ5rwLiuy0033cTrr7+ebzt48CCzZ89m6tSpfP7zn+fll18uYIQDp6++ePDBB3uNj6eeeqqAUZ49R44cYf78+UyfPp1rrrmG5cuX4zgOUHxj4kR9UUxj4sCBA8ydO5dp06Zx/fXX88QTT+S3FduYOBcM2mdRDbQVK1awc+dO1q1bx+HDh1m4cCFjx44tuieX79mzhxkzZvDAAw/k22zbLmBEA8dxHBYsWMDu3Z3P/dFaM2/ePCZPnsyzzz7L5s2bueeee/jDH/7A2LFD9wk0ffUFQF1dHQsWLOCWW27Jt6VSqYEO76zTWjN//nxKS0tZv349LS0tLF68GNM0ue+++4pqTJyoLxYuXFg0Y0Ipxde+9jWqq6v5/e9/z4EDB/jWt77F6NGjuemmm4pqTJwrJMHph3Q6zcaNG1mzZg1VVVVUVVWxe/du1q9fX3QJTl1dHZMnT2bkyJGFDmVA7dmzhwULFtDzR4evvfYaBw8eZMOGDSSTSSZOnMirr77Ks88+yze/+c0CRXt2Ha8vIBwfc+fOHfLjY+/evWzfvp1XXnmFyspKAObPn88Pf/hDrr322qIaEyfqi44EpxjGRENDA1OmTGHp0qWkUinGjx/PlVdeybZt26isrCyqMXGukCmqfti1axe+7zNt2rR8W01NDW+++SZKqQJGNvDq6uoYP358ocMYcH/729+44ooreOaZZ7q1v/nmm1x88cUkk8l8W01NTbc7bA81x+uLtrY2jhw5UhTjY+TIkTzxxBP5L/QObW1tRTcmTtQXxTQmRo0axapVq0ilUmit2bZtG1u3bmX69OlFNybOFXIGpx/q6+upqKggFovl2yorK3Ech+bmZoYPH17A6AaO1pp9+/bx8ssv89hjjxEEAbNmzWL+/Pnd+mYo+vKXv9xne319PaNGjerWNmLECD744IOBCKsgjtcXdXV1GIbBo48+yl//+lfKy8v5yle+0m1qYqgoLS3lmmuuya8rpXjqqaf49Kc/XXRj4kR9UUxjoqsbbriBw4cPM2PGDGbOnMmyZcuKakycKyTB6YdMJtPrC7xjvecTzYeyw4cP5/ti1apVvP/++zz44INks1nuv//+QodXEMcbG8U0Ljrs3bsXwzCYMGECd9xxB1u3buX73/8+qVSKz33uc4UO76xauXIl//znP/nd737HL3/5y6IeE1374u233y7KMfHTn/6UhoYGli5dyvLly+XvRIFIgtMPtm33Gogd6/F4vBAhFcT555/P66+/TllZGYZhMGXKFJRSfOc732HRokVYllXoEAecbds0Nzd3a3Ndt6jGRYfa2lpmzJhBeXk5AJ/85CfZv38/v/nNb4b0l9nKlStZt24dP/nJT5g8eXJRj4mefTFp0qSiHBPV1dVAeDH+t7/9bW699VYymUy3fYplTBSSXIPTD6NHj6apqQnf9/Nt9fX1xONxSktLCxjZwCsvL8cwjPz6xIkTcRyHlpaWAkZVOKNHj6ahoaFbW0NDQ6/T0cXAMIz8F1mHCRMmcOTIkcIENAAeeOABnnzySVauXMnMmTOB4h0TffVFMY2JhoYGNm/e3K3toosuwvM8Ro4cWZRjotAkwemHKVOmEIlEul0Qtm3bNqqrqzHN4unCl156iSuuuKLb/4m88847lJeXF811SD1deumlvP3222Sz2Xzbtm3buPTSSwsYVWE8/PDDvR52u2vXLiZMmFCYgM6yn/3sZ2zYsIEf//jH3Hjjjfn2YhwTx+uLYhoT77//Pvfcc0+35G3nzp0MHz6cmpqaohsT54Li+Xb+CBKJBLW1tSxdupQdO3awefNm1q5dy1133VXo0AbUtGnTsG2b+++/n71797JlyxZWrFjBV7/61UKHVjDTp0/nvPPOY9GiRezevZvHH3+cHTt28IUvfKHQoQ24GTNmsHXrVn7xi1/w3nvv8fTTT7Np0ybmzJlT6NDOuLq6OlavXs3dd99NTU0N9fX1+VJsY+JEfVFMY6K6upqqqioWL17Mnj172LJlCytXruQb3/hG0Y2Jc4YW/ZJOp/V9992np06dqq+++mr95JNPFjqkgnj33Xf17Nmz9dSpU/VVV12lH3nkEa2UKnRYA2ry5Mn6tddey6/v379f33777fpTn/qUvvHGG/Urr7xSwOgGVs++ePHFF/XNN9+sq6ur9axZs/Sf//znAkZ39jz22GN68uTJfRati2tMnKwvimVMaK31Bx98oOfNm6cvu+wyfdVVV+mf//zn+b+PxTQmzhWG1n3crUsIIYQQYhCTKSohhBBCDDmS4AghhBBiyJEERwghhBBDjiQ4QgghhBhyJMERQgghxJAjCY4QQgghhhxJcIQQQggx5EiCI4QQQoghRxIcIUTeO++8wxtvvHFar73hhht47rnnznBEQghxeiTBEULkzZs3j/379xc6DCGE+MgkwRFCCCHEkCMJjhACgDvvvJNDhw6xaNEivvvd7/Luu+9y5513cskllzBz5kzWr1/fbf8NGzZw/fXXc9lll7F69eoCRS2EEH2TBEcIAcAjjzzCmDFjWLx4Md/73ve4++67qamp4fnnn2fhwoWsXr2aTZs2AfDSSy/x0EMPce+99/LMM8/w1ltvcejQocJ+ACGE6CJS6ACEEOeG8vJyLMuipKSEP/3pT4wYMYJ7770XgPHjx3Po0CF+9atfUVtby8aNG7n55pupra0FYNmyZVx33XWFC14IIXqQBEcI0cvevXvZtWsX06ZNy7cFQYBlWQDU1dXxpS99Kb+toqKCj33sYwMepxBCHI8kOEKIXnzf58orr2TJkiXH3Udr3W09Go2e7bCEEKLf5BocIUQvF154Ifv27eOCCy5g3LhxjBs3ju3bt/PrX/8agEmTJvHWW2/l929ra+PAgQOFClcIIXqRBEcIkZdMJtm7dy/XXXcd2WyWJUuWUFdXx5YtW3jooYcYMWIEAHfccQd//OMf+e1vf0tdXR1Lliwhm80WOHohhOgkU1RCiLzbbruNH/3oR+zfv581a9awbNkyamtrKS8v5/bbb+frX/86AJdffjnLly9n1apVNDY2cuuttzJlypQCRy+EEJ0M3XMiXQghhBBikJMpKiGEEEIMOZLgCCGEEGLIkQRHCCGEEEOOJDhCCCGEGHIkwRFCCCHEkCMJjhBCCCGGHElwhBBCCDHkSIIjhBBCiCFHEhwhhBBCDDmS4AghhBBiyJEERwghhBBDzv8HGa9kOaE1FRsAAAAASUVORK5CYII=" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "g = sns.displot(\n", + " df_synt_scores,\n", + " x=\"ted\",\n", + " hue=\"lang_id\",\n", + " kde=True,\n", + " # log_scale=(False, 2),\n", + " multiple=\"layer\",\n", + " alpha=0.15,\n", + " # facet_kws={'hist_kws':dict(alpha=0.1)}\n", + ")\n", + "plt.xlim(-1, 30)\n", + "plt.ylim(0, None)\n", + "plt.show()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-19T10:31:42.618669Z", + "start_time": "2023-07-19T10:31:39.680391Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 372, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "f, axes = plt.subplots(2, 3, figsize=(9, 6), sharex=True, sharey=True)\n", + "\n", + "for ax, lang in zip(axes.flat, df['lang_id'].unique()):\n", + " #\n", + " # # Create a cubehelix colormap to use with kdeplot\n", + " # cmap = sns.cubehelix_palette(start=s, light=1, as_cmap=True)\n", + "\n", + " sns.histplot(\n", + " df_synt_scores[df_synt_scores['lang_id'] == lang],\n", + " x=\"ted\",\n", + " kde=True,\n", + " # log_scale=(False, 2),\n", + " multiple=\"layer\",\n", + " # alpha=0.25,\n", + " # facet_kws={'hist_kws':dict(alpha=0.1)}\n", + " stat='count',\n", + " ax=ax,\n", + " )\n", + " ax.set_title(lang)\n", + " # ax.set_axis_off()\n", + "\n", + "ax.set(xlim=(-1, 30))\n", + "f.suptitle(\"Distribution of Tree Edit Distance (TED)\", fontsize=12)\n", + "plt.show()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-19T10:06:31.926535Z", + "start_time": "2023-07-19T10:06:27.991091Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 309, + "outputs": [ + { + "data": { + "text/plain": " unit_id lang_id ted mt_tbd_bad_count\n2864 flores101-main-nld-100-pe2-1 nld 11 4\n433 flores101-main-ukr-5-pe2-2 ukr 2 6\n4835 flores101-main-vie-93-pe1-5 vie 7 2\n4776 flores101-main-vie-69-pe1-4 vie 18 2\n2975 flores101-main-nld-39-pe1-5 nld 0 0", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
unit_idlang_idtedmt_tbd_bad_count
2864flores101-main-nld-100-pe2-1nld114
433flores101-main-ukr-5-pe2-2ukr26
4835flores101-main-vie-93-pe1-5vie72
4776flores101-main-vie-69-pe1-4vie182
2975flores101-main-nld-39-pe1-5nld00
\n
" + }, + "execution_count": 309, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_synt_scores['mt_tbd_bad_count'] = df['mt_tbd_qe'].apply(eval).apply(lambda x: sum(len(i - {'OK', 'BAD-DEL-L', 'BAD-DEL-R', 'BAD-SHF'}) for i in x)).values\n", + "df_synt_scores.sample(5)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-19T09:36:35.620924Z", + "start_time": "2023-07-19T09:36:30.127487Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 327, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pearson correlation\n" + ] + }, + { + "data": { + "text/plain": "lang_id \nara ted 0.556922\nita ted 0.434015\nnld ted 0.669098\ntur ted 0.689409\nukr ted 0.710757\nvie ted 0.642947\nName: mt_tbd_bad_count, dtype: float64" + }, + "execution_count": 327, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print('Pearson correlation')\n", + "df_synt_scores.groupby('lang_id')[['ted', 'mt_tbd_bad_count']].corr(method='pearson').loc[(slice(None),'ted'), 'mt_tbd_bad_count']" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-19T09:43:18.169352Z", + "start_time": "2023-07-19T09:43:18.104678Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 355, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: BAD count except for BAD-DEL and BAD-SHF\n" + ] + }, + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print('Note: BAD count except for BAD-DEL and BAD-SHF')\n", + "sns.jointplot(df_synt_scores, x=\"ted\", y=\"mt_tbd_bad_count\", hue=\"lang_id\", kind=\"kde\", fill=Fl, marginal_kws={'hist_kws':dict(alpha=0.1)})\n", + "plt.xlim(-5, 60)\n", + "plt.ylim(-5, 30)\n", + "plt.show()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-19T09:59:48.479409Z", + "start_time": "2023-07-19T09:59:42.344089Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 374, + "outputs": [ + { + "data": { + "text/plain": "Text(0.5, 0.98, 'Correlation of Tree Edit Distance (TED) vs #BAD-X tags')" + }, + "execution_count": 374, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "f, axes = plt.subplots(2, 3, figsize=(9, 6), sharex=True, sharey=True)\n", + "\n", + "for ax, lang in zip(axes.flat, df['lang_id'].unique()):\n", + " #\n", + " # # Create a cubehelix colormap to use with kdeplot\n", + " # cmap = sns.cubehelix_palette(start=s, light=1, as_cmap=True)\n", + "\n", + " sns.kdeplot(\n", + " df_synt_scores[df_synt_scores['lang_id'] == lang],\n", + " x=\"ted\",\n", + " y=\"mt_tbd_bad_count\",\n", + " # cmap=None,\n", + " fill=True,\n", + " # clip=(-5, 5),\n", + " # cut=10,\n", + " # thresh=0,\n", + " # levels=15,\n", + " ax=ax,\n", + " )\n", + " ax.set_title(lang)\n", + " # ax.set_axis_off()\n", + "\n", + "ax.set(xlim=(-7, 40), ylim=(-5, 25))\n", + "f.suptitle(\"Correlation of Tree Edit Distance (TED) vs #BAD-X tags\", fontsize=12)\n", + "plt.show()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-19T10:07:10.961389Z", + "start_time": "2023-07-19T10:07:04.374076Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "---" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 35, + "outputs": [], + "source": [ + "import matplotlib.ticker as ticker\n", + "\n", + "\n", + "def plot_summary_df(summary_df, title=''):\n", + " try:\n", + " summary_df = summary_df.drop(['TOTAL', 'AVG'], axis=0)\n", + " except KeyError:\n", + " pass\n", + "\n", + " sns.set_theme(style=\"whitegrid\")\n", + " sns.set(font_scale=1.5)\n", + "\n", + " # Make the PairGrid\n", + " g = sns.PairGrid(\n", + " # summary_df.reset_index().sort_values(\"total (same pos)\", ascending=False),\n", + " summary_df.reset_index(),\n", + " x_vars=summary_df.columns,\n", + " y_vars=[\"lang_id\"],\n", + " height=10,\n", + " aspect=.3,\n", + " )\n", + "\n", + " # Draw a dot plot using the stripplot function\n", + " g.map(\n", + " sns.stripplot,\n", + " size=15,\n", + " orient=\"h\",\n", + " jitter=False,\n", + " palette=\"flare_r\",\n", + " linewidth=2,\n", + " edgecolor=\"w\",\n", + " )\n", + "\n", + " if title:\n", + " g.fig.subplots_adjust(top=0.9)\n", + " g.fig.suptitle(title)\n", + "\n", + " # Calculate the average for each column and draw a horizontal line\n", + " for ax, col in zip(g.axes.flat, summary_df.columns):\n", + " avg = summary_df[col].mean()\n", + " ax.axvline(avg, color='r', linestyle='--')\n", + "\n", + " step = 5000 if summary_df[col].max() > 10000 else 2500 if summary_df[col].max() > 5000 else 1000 if summary_df[col].max() > 2000 else 500 if summary_df[col].max() > 800 else 100 if summary_df[col].max() > 500 else 50\n", + " ax.set_xticks(np.arange(0, summary_df[col].max(), step=step))\n", + " ax.xaxis.set_major_formatter(ticker.EngFormatter())\n", + "\n", + "\n", + " # setup axis limits\n", + " g.set(xlim=(0, None), xlabel=\"Count\", ylabel=\"\")\n", + " for ax, title in zip(g.axes.flat, list(summary_df.columns)):\n", + " ax.set(title=title)\n", + " ax.xaxis.grid(True)\n", + " ax.yaxis.grid(True)\n", + "\n", + " sns.despine(left=True, bottom=True)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-18T17:00:42.468181Z", + "start_time": "2023-07-18T17:00:42.176945Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 190, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TOTAL:\t 14809\n", + "SAME POS:\t 10820\n", + "DIFF POS:\t 3989\n" + ] + }, + { + "data": { + "text/plain": " total same_pos diff_pos diff_deprel\nlang_id \nara 2055 1230 825 1027\nita 2368 1702 666 861\nnld 1728 1301 427 600\ntur 2217 1685 532 1079\nukr 4295 3425 870 1641\nvie 2146 1477 669 1217\nTOTAL 14809 10820 3989 6425\nAVG 2468 1803 664 1070", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
totalsame_posdiff_posdiff_deprel
lang_id
ara205512308251027
ita23681702666861
nld17281301427600
tur221716855321079
ukr429534258701641
vie214614776691217
TOTAL148091082039896425
AVG246818036641070
\n
" + }, + "execution_count": 190, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print('TOTAL:\\t', len(df_stats))\n", + "print('SAME POS:\\t', len(df_stats[df_stats['same_pos']]))\n", + "print('DIFF POS:\\t', len(df_stats[~df_stats['same_pos']]))\n", + "\n", + "tmp = pd.DataFrame([\n", + " df_stats.groupby(['lang_id']).size(),\n", + " df_stats[df_stats['same_pos']].groupby(['lang_id']).size(),\n", + " df_stats[~df_stats['same_pos']].groupby(['lang_id']).size(),\n", + " df_stats[~df_stats['same_deprel']].groupby(['lang_id']).size(),\n", + "], index=[\n", + " 'total',\n", + " 'same_pos',\n", + " 'diff_pos',\n", + " 'diff_deprel',\n", + "]).fillna(0).astype('int').T\n", + "tmp.loc['TOTAL'] = tmp.sum(numeric_only=True)\n", + "tmp.loc['AVG'] = (tmp.loc['TOTAL'] / len(set(df_stats['lang_id']) - {'TOTAL'})).astype('int')\n", + "\n", + "# print(tmp)\n", + "tmp" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-13T11:05:51.373876Z", + "start_time": "2023-07-13T11:05:51.325488Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 193, + "outputs": [ + { + "data": { + "text/plain": " total same_pos diff_pos diff_deprel\nlang_id \nara 2055 1230 825 1027\nita 2368 1702 666 861\nnld 1728 1301 427 600\ntur 2217 1685 532 1079\nukr 4295 3425 870 1641\nvie 2146 1477 669 1217\nTOTAL 14809 10820 3989 6425\nAVG 2468 1803 664 1070", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
totalsame_posdiff_posdiff_deprel
lang_id
ara205512308251027
ita23681702666861
nld17281301427600
tur221716855321079
ukr429534258701641
vie214614776691217
TOTAL148091082039896425
AVG246818036641070
\n
" + }, + "execution_count": 193, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tmp" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-13T11:06:01.710994Z", + "start_time": "2023-07-13T11:06:01.681673Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 191, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_summary_df(tmp, title='BAD-SUB types')" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-13T11:05:54.004076Z", + "start_time": "2023-07-13T11:05:52.391641Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 182, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TOTAL:\t 10820\n" + ] + }, + { + "data": { + "text/plain": " total (same pos) same_lemma same_morf same_lemma diff_morf \\\nlang_id \nara 1230 93 374 \nita 1702 100 500 \nnld 1301 153 100 \ntur 1685 115 623 \nukr 3425 128 1164 \nvie 1477 0 0 \nTOTAL 10820 589 2761 \nAVG 1803 98 460 \n\n diff_lemma same_morf diff_lemma diff_morf same_word (diff case) \nlang_id \nara 498 265 0 \nita 705 397 68 \nnld 867 181 114 \ntur 604 343 79 \nukr 1067 1066 124 \nvie 1468 9 160 \nTOTAL 5209 2261 545 \nAVG 868 376 90 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
total (same pos)same_lemma same_morfsame_lemma diff_morfdiff_lemma same_morfdiff_lemma diff_morfsame_word (diff case)
lang_id
ara1230933744982650
ita170210050070539768
nld1301153100867181114
tur168511562360434379
ukr3425128116410671066124
vie14770014689160
TOTAL10820589276152092261545
AVG18039846086837690
\n
" + }, + "execution_count": 182, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "_df_stats = df_stats[df_stats['same_pos']]\n", + "print('TOTAL:\\t', len(_df_stats))\n", + "\n", + "tmp = pd.DataFrame([\n", + " _df_stats.groupby(['lang_id']).size(),\n", + " _df_stats[_df_stats['same_lemma'] & _df_stats['same_morf']].groupby(['lang_id']).size(),\n", + " _df_stats[_df_stats['same_lemma'] & ~_df_stats['same_morf']].groupby(['lang_id']).size(),\n", + " _df_stats[~_df_stats['same_lemma'] & _df_stats['same_morf']].groupby(['lang_id']).size(),\n", + " _df_stats[~_df_stats['same_lemma'] & ~_df_stats['same_morf']].groupby(['lang_id']).size(),\n", + " _df_stats[_df_stats['same_word']].groupby(['lang_id']).size(),\n", + "], index=[\n", + " 'total (same pos)',\n", + " 'same_lemma same_morf',\n", + " 'same_lemma diff_morf',\n", + " 'diff_lemma same_morf',\n", + " 'diff_lemma diff_morf',\n", + " 'same_word (diff case)',\n", + "]).fillna(0).astype('int').T\n", + "tmp.loc['TOTAL'] = tmp.sum(numeric_only=True)\n", + "tmp.loc['AVG'] = (tmp.loc['TOTAL'] / len(set(df_stats['lang_id']) - {'TOTAL'})).astype('int')\n", + "\n", + "# print(tmp)\n", + "tmp" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-13T10:37:02.689878Z", + "start_time": "2023-07-13T10:37:02.633850Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 183, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_summary_df(tmp, title='BAD-SUB same_pos types')" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-07-13T10:37:11.466933Z", + "start_time": "2023-07-13T10:37:08.060140Z" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "---" + ], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/qe_visualize.ipynb b/notebooks/qe_visualize.ipynb new file mode 100644 index 0000000..73fa658 --- /dev/null +++ b/notebooks/qe_visualize.ipynb @@ -0,0 +1,490 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## Imports" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-27T12:24:02.230745Z", + "start_time": "2023-08-27T12:24:02.185332Z" + } + }, + "outputs": [], + "source": [ + "from typing import List, Set, Tuple, Union, Optional\n", + "from pathlib import Path\n", + "import ast\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import networkx as nx\n", + "import pandas as pd\n", + "\n", + "from divemt.qe_taggers import NameTBDGeneralTags, NameTBDTagger\n", + "from divemt.qe_taggers.custom_simalign import SentenceAligner as CustomSentenceAligner" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Load data" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-27T12:24:03.212569Z", + "start_time": "2023-08-27T12:24:03.153402Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": "True" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DATASET_FOLDER = Path() / '..' / 'data' / 'processed'\n", + "DATASET_FOLDER.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "def get_sample(df, idx):\n", + " sample = df.iloc[idx]\n", + " lang_id = sample['lang_id']\n", + "\n", + " src_tokens = ast.literal_eval(sample['src_tokens'])\n", + " mt_tokens = ast.literal_eval(sample['mt_tokens'])\n", + " tgt_tokens = ast.literal_eval(sample['tgt_tokens'])\n", + "\n", + " src_tbd_qe = ast.literal_eval(sample[f'src_tbd_qe'])\n", + " mt_tbd_qe = ast.literal_eval(sample[f'mt_tbd_qe'])\n", + "\n", + " return lang_id, src_tokens, mt_tokens, tgt_tokens, src_tbd_qe, mt_tbd_qe" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# read pandas dataframe\n", + "df_it = pd.read_csv(DATASET_FOLDER / 'ita' / 't1_warmup_texts.tsv', sep='\\t')\n", + "df_it = df_it[pd.notna(df_it['mt_tokens'])]\n", + "len(df_it)\n", + "df_it" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Aux functions for visualization" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-27T12:24:05.676319Z", + "start_time": "2023-08-27T12:24:05.653381Z" + } + }, + "outputs": [], + "source": [ + "def custom_bipartite_layout(\n", + " G, top, bottom, aspect_ratio=4 / 3,\n", + "):\n", + " height = 1\n", + " width = aspect_ratio * height\n", + " offset = (width / 2, height / 2)\n", + "\n", + " nodes = list(top) + bottom\n", + "\n", + " left_xs = np.repeat(0, len(top))\n", + " right_xs = np.repeat(width, len(bottom))\n", + " left_ys = np.linspace(0, height, len(top))\n", + " right_ys = np.linspace(0, height, len(bottom))\n", + "\n", + " top_pos = np.column_stack([left_xs, left_ys]) - offset\n", + " bottom_pos = np.column_stack([right_xs, right_ys]) - offset\n", + "\n", + " pos = np.concatenate([top_pos, bottom_pos])\n", + " pos = pos[:, ::-1] # swap x and y coords for horizontal\n", + " pos = dict(zip(nodes, pos))\n", + " return pos\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-27T12:24:06.775669Z", + "start_time": "2023-08-27T12:24:06.749448Z" + } + }, + "outputs": [], + "source": [ + "def draw_aligned_qe(\n", + " top_tokens: List[str],\n", + " bottom_tokens: List[str],\n", + " top_qe_tags: Optional[List[Union[str, Set[str]]]],\n", + " bottom_qe_tags: Optional[List[Union[str, Set[str]]]],\n", + " top_bottom_alignments: List[Union[Tuple[int, int], Tuple[int, int, float]]],\n", + " *,\n", + " title: str = None,\n", + "):\n", + " # create graph\n", + " G = nx.Graph()\n", + " top, bottom = [f'top_{i}' for i in range(len(top_tokens))], [f'bottom_{i}' for i in range(len(bottom_tokens))]\n", + " G.add_nodes_from(top, bipartite=0)\n", + " G.add_nodes_from(bottom, bipartite=1)\n", + " G.add_edges_from([(f'top_{alignment[0]}', f'bottom_{alignment[1]}') for alignment in top_bottom_alignments])\n", + "\n", + " # set words as nore names\n", + " custom_node_names = {}\n", + " custom_node_names.update({f'top_{i}': tok for i, tok in enumerate(top_tokens)})\n", + " custom_node_names.update({f'bottom_{i}': tok for i, tok in enumerate(bottom_tokens)})\n", + "\n", + " # set qe attributes as labels\n", + " custom_node_attrs = {}\n", + " if top_qe_tags:\n", + " custom_node_attrs.update({f'top_{i}': str(qe) for i, qe in enumerate(top_qe_tags)})\n", + " if bottom_qe_tags:\n", + " custom_node_attrs.update({f'bottom_{i}': str(qe) for i, qe in enumerate(bottom_qe_tags)})\n", + "\n", + " # connection weighs if any\n", + " if top_bottom_alignments and len(top_bottom_alignments[0]) == 3:\n", + " custom_edge_weights = {(f'top_{i}', f'bottom_{j}'): round(w, 2) for i, j, w in top_bottom_alignments}\n", + " else:\n", + " custom_edge_weights = None\n", + "\n", + " # get nodes and attributes positions\n", + " pos = custom_bipartite_layout(G, bottom, top)\n", + " pos_attrs = {node: (x, y+0.2) if node in top else (x, y-0.2) for node, (x, y) in pos.items()}\n", + "\n", + " # draw graph\n", + " fig, ax = plt.subplots()\n", + " width = max(3*max(len(top_tokens), len(bottom_tokens)), 12)\n", + " ax.margins(0.1, 0.2)\n", + " height = 6\n", + " fig.set_size_inches(width, height)\n", + " nx.draw_networkx(G, pos=pos, width=2, ax=ax, labels=custom_node_names, node_size=0, bbox=dict(facecolor='white', edgecolor='skyblue', boxstyle='round', pad=0.2), edgelist=custom_edge_weights.keys(), edge_color=custom_edge_weights.values(), edge_cmap=plt.cm.Blues, edge_vmin=0.5, edge_vmax=1)\n", + " nx.draw_networkx_labels(G, pos_attrs, labels=custom_node_attrs, font_size=9)\n", + " if custom_edge_weights:\n", + " nx.draw_networkx_edge_labels(G, pos, edge_labels=custom_edge_weights, font_size=8, label_pos=0.5, ax=ax)\n", + " if title:\n", + " ax.set_title(title)\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-27T12:36:38.042486Z", + "start_time": "2023-08-27T12:36:37.009381Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "alignments = [(1, 0, 0.9), (2, 2, 0.75), (2, 3, 0.6), (3, 4, 0.75), (4, 4, 0.9), (5, 5, 0.95)]\n", + "top_tokens = ['There', 'some', 'translated', 'the', 'words', 'here']\n", + "top_qe = [{'BAD-INS'}, {'OK', 'BAD-DEL-R'}, {'BAD-CON', 'BAD-DEL-L'}, {'BAD-EXP'}, {'BAD-EXP'}, {'OK'}]\n", + "bottom_tokens = ['Some', 'cool', 'post', 'edit', 'words', 'here']\n", + "# bottom_qe = [{'OK'}, {'BAD-INS'}, {'BAD-DEL-L', 'OK'}, {'BAD-DEL-R', 'OK'}]\n", + "bottom_qe = []\n", + "\n", + "draw_aligned_qe(top_tokens, bottom_tokens, top_qe, bottom_qe, alignments, title='Translated and Post-edited Sentences alignments with QE tags')" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Load align models" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-12T11:12:48.109430Z", + "start_time": "2023-06-12T11:12:02.024504Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']\n", + "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias']\n", + "- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" + ] + } + ], + "source": [ + "aligner_bert = CustomSentenceAligner(model=\"bert\", token_type=\"bpe\", matching_methods=\"mai\", return_similarity=\"avg\")\n", + "aligner_xlmr = CustomSentenceAligner(model=\"xlmr\", token_type=\"bpe\", matching_methods=\"mai\", return_similarity=\"avg\")\n", + "tagger_bert = NameTBDTagger(aligner=aligner_bert)\n", + "tagger_xlmr = NameTBDTagger(aligner=aligner_xlmr)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Cherry pick samples and analyze alignments" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-12T11:16:01.430049Z", + "start_time": "2023-06-12T11:16:01.392714Z" + } + }, + "outputs": [], + "source": [ + "# select index of a sample to visualize\n", + "idx = 1\n", + "# idx = 5\n", + "# idx = 8\n", + "# idx = 10\n", + "lang_id, src_tokens, mt_tokens, tgt_tokens, src_tbd_qe, mt_tbd_qe = get_sample(df_it, idx)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "xlmr" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-12T11:16:02.881106Z", + "start_time": "2023-06-12T11:16:01.742546Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Aligning mt-pe: 100%|██████████| 1/1 [00:00<00:00, 4.84it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "mt_pe_alignments = tagger_xlmr.align_mt_pe([mt_tokens], [tgt_tokens], lang_id)[0]\n", + "draw_aligned_qe(mt_tokens, tgt_tokens, mt_tbd_qe, None, mt_pe_alignments, title='MT - PE (XLMR)')" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-12T11:16:03.680420Z", + "start_time": "2023-06-12T11:16:02.888963Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Aligning src-mt: 100%|██████████| 1/1 [00:00<00:00, 4.18it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "src_mt_alignments = tagger_xlmr.align_source_mt([src_tokens], [mt_tokens], 'eng', lang_id)[0]\n", + "draw_aligned_qe(src_tokens, mt_tokens, src_tbd_qe, mt_tbd_qe, src_mt_alignments, title='SRC - MT (XLMR)')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "bert" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-12T11:16:04.511296Z", + "start_time": "2023-06-12T11:16:03.682058Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Aligning mt-pe: 100%|██████████| 1/1 [00:00<00:00, 4.46it/s]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "mt_pe_alignments = tagger_bert.align_mt_pe([mt_tokens], [tgt_tokens], lang_id)[0]\n", + "draw_aligned_qe(mt_tokens, tgt_tokens, mt_tbd_qe, None, mt_pe_alignments, title='MT - PE (BERT)')" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-12T11:16:05.279593Z", + "start_time": "2023-06-12T11:16:04.510135Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Aligning src-mt: 100%|██████████| 1/1 [00:00<00:00, 4.98it/s]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAACvoAAAH4CAYAAACB/KgGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hU5brG4Wdm0nuFUBJSKKEEIk0poQk2EFABAUURERti77jFsy1gb1uxsBELKKjYFWELGKmC0jskdAIJ6T0zc/5IGIh0CFkzye++rlxnZs2aNe/iHN+zyrO+z2S32+0CAAAAAAAAAAAAAAAAAAAA4FTMRhcAAAAAAAAAAAAAAAAAAAAA4HgEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAkqQXX3xR8fHxstlsRpdy1oYOHaohQ4YYXQYAAAAAAAAAVCmCvgAAAAAAAABqjLVr12rQoEFq1KiRvLy81KBBA/Xp00dvvfVWpfWio6NlMpkcf76+vurYsaM+/vjjE243JydHzzzzjNq0aSM/Pz95e3urVatWevTRR7Vv374qq3/BggWOmj799NMTrtOlSxeZTCa1atVKkjRhwoRK+3Kyvx49epzyt3NycjRp0iQ9+uijMpuPXjr+53Z8fX3VokULPfvssyooKKi0jZEjR5709728vE64nyaTSRaLRXXq1NGgQYO0cePGc9qvRx99VF999ZVWr159tv/sAAAAAAAAAOC03IwuAAAAAAAAAACqwuLFi9WzZ09FRUXptttuU0REhHbv3q2lS5fqjTfe0D333FNp/cTERD344IOSpP379+vDDz/UzTffrOLiYt12222O9Xbs2KHevXtr165dGjx4sMaMGSMPDw+tWbNGU6ZM0ezZs7Vly5Yq3RcvLy9Nnz5dN954Y6XlqampWrx4caXQ7LXXXqvGjRs73ufl5enOO+/UNddco2uvvdaxvG7duqf8zf/+978qKyvTsGHDjvusT58+uummmxzbT05O1lNPPaXVq1dr1qxZldb19PTUhx9+eNw2LBbLccvGjRunDh06qLS0VGvWrNHkyZO1YMECrVu37qz366KLLlL79u31yiuvnDSwDQAAAAAAAACuhqAvAAAAAAAAgBrhueeeU2BgoP78808FBQVV+uzgwYPHrd+gQYNKQdqRI0cqNjZWr732miPoW1ZWpmuvvVZpaWlasGCBunbtetxvTpo0qcr35aqrrtJ3332n9PR0hYWFOZZPnz5ddevWVZMmTZSZmSlJat26tVq3bu1YJz09XXfeeadat259XFD4VKZOnar+/ftXChEf0bRp00rbuuOOO1RSUqKvv/5aRUVFlb7j5uZ2xr+blJSkQYMGOd43a9ZMd955pz7++GM98sgjZ71fQ4YM0dNPP6133nlHfn5+Z1QDAAAAAAAAADgz8+lXAQAAAAAAAADnt337drVs2fK4kK8k1alT57TfDw8PV3x8vLZv3+5Y9tVXX2n16tV68sknjwv5SlJAQICee+6586r7RAYMGCBPT8/jRsudPn26hgwZcsLRcc9HSkqK1qxZo969e5/xdyIiImQymeTmVnXjSSQlJUlSpf8dnI0+ffooPz9fc+fOrbKaAAAAAAAAAMBIBH0BAAAAAAAA1AiNGjXSypUrtW7dunP6fllZmfbs2aPg4GDHsu+++06SNGLEiCqp8Uz5+PhowIABmjFjhmPZ6tWrtX79eg0fPrzKf2/x4sWSpLZt257w86KiIqWnpys9PV07d+7U9OnTNW3aNA0fPvyEQd8j6x77l5OTc9o6UlNTJanS/w7ORosWLeTt7a1Fixad0/cBAAAAAAAAwNlU3VALAAAAAAAAAGCghx56SFdeeaUSExPVsWNHJSUl6dJLL1XPnj3l7u5+3PqlpaVKT0+XJB04cEAvvviiDhw4oLvvvtuxzsaNGxUYGKjIyMhq248jhg8frquvvlq7d+9WZGSkPvvsM8XGxuqSSy6p8t/atGmTJCkmJuaEn0+ZMkVTpkyptGzgwIH64IMPjls3Pz9f4eHhxy2//PLL9csvv1Ralpubq/T0dJWWlmrNmjW67777ZDKZdN11153Tfri5uSkyMlIbNmw4p+8DAAAAAAAAgLMh6AsAAAAAAACgRujTp4+WLFmiF154QXPmzNGSJUv04osvKjw8XB9++KH69+9faf1ff/31uEDqLbfcopdeesnxPicnR/7+/tVS/z9ddtllCgkJ0eeff66HHnpIn3/+uW666aYL8lsZGRlyc3OTn5/fCT8fMGCAxo4dK0kqKCjQ0qVL9dprr2n48OH68ssvZTKZHOt6eXnp+++/P24bYWFhxy0bNWpUpffh4eH65JNP1KFDh3Pel+DgYEeAGwAAAAAAAABcHUFfAAAAAAAAADVGhw4d9PXXX6ukpESrV6/W7Nmz9dprr2nQoEFatWqVWrRo4Vj34osv1rPPPiur1ap169bp2WefVWZmpjw8PBzrBAQEaMeOHedcT0lJiQ4fPlxpWXh4uCwWy2m/6+7ursGDB2v69Onq2LGjdu/ereHDh59zLeejYcOG6t27t+N9//79FRoaqoceekg//PCDrr76asdnFoul0rqn8q9//UtJSUnKy8vT7Nmz9fnnn8tsNp9XrXa7vVLwGAAAAAAAAABc2fldMQUAAAAAAAAAJ+Th4aEOHTro+eef17vvvqvS0lLNmjWr0jphYWHq3bu3Lr/8cj344IP69NNP9c033+iNN95wrBMfH6/s7Gzt3r37nOpYvHix6tWrV+nvbLY1fPhwrVq1ShMmTFCbNm0qBZWrUmhoqMrKypSbm3vG37n00kslSb///vs5/25CQoJ69+6tgQMHatq0aerfv79uu+22c/73lqTMzMwTjh4MAAAAAAAAAK6IoC8AAAAAAACAGq19+/aSpP37959yvb59+6p79+56/vnnlZ+fL0mOkWo//fTTc/rtNm3aaO7cuZX+IiIizvj7Xbt2VVRUlBYsWHBBR/ONj4+XJKWkpJzxd8rKyiRJeXl5VVbHxIkTVVRUpOeee+6cvl9WVqbdu3erefPmVVYTAAAAAAAAABiJoC8AAAAAAACAGmH+/Pmy2+3HLf/pp58kSc2aNTvtNh599FFlZGTogw8+kCQNGjRICQkJeu6557RkyZLj1s/NzdWTTz550u0FBwerd+/elf68vLzOdJdkMpn05ptv6umnn9aIESPO+Htnq1OnTpKkFStWnPF3vv/+e0nlYeaqEhcXp+uuu04fffSRDhw4cNbf37Bhg4qKitS5c+cqqwkAAAAAAAAAjORmdAEAAAAAAAAAUBXuueceFRQU6JprrlF8fLxKSkq0ePFiffHFF4qOjtYtt9xy2m1ceeWVatWqlV599VXdfffdcnd319dff63evXurW7duGjJkiLp06SJ3d3etX79e06dPV3Bw8DmPQHsmBgwYoAEDBlyw7UtSbGysWrVqpXnz5mnUqFHHfb5lyxbHqMYFBQVaunSppk2bpsaNGx8XQC4rKzvpCMjXXHONfH19T1nLww8/rJkzZ+r111/XxIkTz2o/5s6dKx8fH/Xp0+esvgcAAAAAAAAAzoqgLwAAAAAAAIAa4eWXX9asWbP0008/6f3331dJSYmioqJ01113afz48QoKCjqj7Tz00EMaOXKkPvvsM40cOVKNGzfWqlWr9Nprr2n27Nn65ptvZLPZ1LhxY40ePVrjxo27sDtWTUaNGqV//etfKiwslLe3d6XP5s6dq7lz50qSLBaL6tWrp9GjR+vf//73ccHd4uLik44+nJKSctqgb/v27dWjRw+9++67evzxxxUYGHjG+zBr1ixde+218vf3P+PvAAAAAAAAAIAzM9lPNJcdAAAAAAAAAKBWyc7OVmxsrF588UXdeuutRpdz1latWqW2bdvqr7/+UmJiotHlAAAAAAAAAECVIOgLAAAAAAAAAJAkTZo0SVOnTtWGDRtkNpuNLuesDB06VDabTTNnzjS6FAAAAAAAAACoMgR9AQAAAAAAAAAAAAAAAAAAACfkWkMyAAAAAAAAAAAAAAAAAAAAALUEQV8AAAAAAAAAAAAAAAAAAADACRH0BQAAAAAAAAAAAAAAAAAAAJwQQV8AAAAAAAAAAAAAAAAAAADACbmdyUo2m0379u2Tv7+/TCbTha4JAAAAAAAAAAAAAAAAAAAAqJHsdrtyc3NVv359mc2nHrP3jIK++/btU2RkZJUUBwAAAAAAAAAAAAAAAAAAANR2u3fvVsOGDU+5zhkFff39/R0bDAgIOP/KAAAAAAAAAAAAAAAAAAAAgFooJydHkZGRjnzuqZxR0NdkMkmSAgICCPoCAAAAAAAAAAAAAAAAAAAA5+lIPvdUzNVQBwAAAAAAAAAAAAAAAAAAAICzRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAAy2dOlSXXLJJY73l112mebNm2dgRQAAAAAAAAAAAAAAAAAAZ0DQFwCqyciRI3Xfffcdt/zRRx/Vk08+6Xj/5JNP6uGHH67GygDURAcPHlSnTp3k6+urF198sdp+9/3335evr6/at2+v3bt3V9vvAnB99C0A1cGoXuPM6IOAc6NvHY++BdQenCcCcDX0LQCuhr4FAK6DoC8AGGjdunXavHmzrrrqKseybt26KSsrS4sWLTKwMgCububMmcrJydGhQ4f0yCOPSJJ69OihBQsWONbJzc3V/fffr8jISHl7eysuLk7/93//p7KyMsc6H330kRITEx3vrVarRo0apRYtWmjPnj1asGCBevTo4fh8zJgxyszMlJubmz7++OMLvZsAahD6FoDqcLpes2DBAplMJvn5+cnPz0/h4eEaPny4Dh8+fNy2/u///k8mk0k///xzpeWpqamObQQEBCgsLEw9e/bURx99JLvdfsr6fv75Z3Xs2FGBgYEKDg5Whw4d9NNPP1XablZWVqXv/LPvRUdHy9vb2/H77du31/z58x2f0wcB10Lfom8BtRnniQBcDX0LgKuhbwGA6yDoCwDn4NVXX1VUVJT8/f0VHR2tDz/8UJI0b948dezYUUFBQWrZsqW+++47SdKbb76pzz77TO+88478/PzUsmVLSdJ3332nbt26yWKxOLZtMpnUq1cvx3cB4FxkZGSoSZMm8vHxOeHnpaWluvzyy/X3339r7ty5ysvL08yZM/Xll19q2LBhJ/xOcXGxBg0apHXr1ik5OVkNGzY84XoeHh6Kj49Xenp6le0PgJqPvgWgOpyu10hSYGCg8vLylJeXpy1btig9PV2PPvpopXXsdrumTp2qkJAQTZky5YTb2bNnj3JycrR792499NBDeuaZZ3T77bef9He3b9+uwYMH64knntDhw4e1f/9+vfzyy/L39z/r/ZwxY4by8vKUlZWl0aNHa8CAASoqKjrp+vRBwHnRt06MvgXUDpwnAnA19C0Aroa+BQCuw83oAgDA1WzZskXjx4/XX3/9pfj4eKWlpSktLU1r1qzR4MGD9dVXX6lHjx5avHix+vbtq+XLl2vcuHH666+/FBQUpNdff92xrVWrVik+Pv6432jRooV+/fXXatwrADVNWVmZzOaTP9P12WefafPmzdqxY4cCAwMlSe3atdPs2bPVvHnz456szcvL04ABA2S32/W///3vtDduzWZzpSd5AeB06FsAqsPpes0/BQcHa+DAgfr0008rLf/f//6nvXv3avr06Ro+fLgOHTqk8PDwE27D29tbffv2VXBwsLp27ar77rtPLVq0OG69v//+W3Xr1tXAgQMlSRaLRd27dz/znTsBs9msm266SXfeead27dqlpk2bnnJd+iDgfOhb9C2gNuM8EYCroW8BcDX0LQBwHYzoCwBnyWKxyG63a/369SosLFTdunXVunVrvffeexo5cqR69eols9msrl27ql+/fpo5c+ZJt5WZmamAgIDjlgcEBCgzM/NC7gaAGiwvL0/JycmKjo6utPzYk+05c+aob9++jpPyI+Li4nTxxRdXetggNzdXvXr1kr+/v37++edKJ+X/nL7niKioKC1ZskTZ2dlVtl8Aai76FoDqcCa95p/S09P19ddfq0uXLpWWT5kyRf369dN1112n+vXr65NPPjnt73fu3Fn169fXwoULT/h5u3bttG/fPt1555365ZdfdPjw4TPar1MpKyvT1KlT1aBBA8d+0wcB10HfipZE3wJqK84TAbga+hYAV0PfAgDXQtAXAM5SXFycpk2bprffflt169bVZZddplWrVik1NVWTJ09WUFCQ4+/bb7/Vvn37Trqt4OBg5eTkHLc8JydHwcHBF3I3ANRQn3zyiQICArRnzx498cQTJ10vPT1d9evXP+Fn9evX16FDhxzv09LStHLlSt1yyy3y9PQ8ozoefPBBFRUVHTeSOQD8E30LQHU4014jSdnZ2Y5zujp16mjv3r269957HZ8fPnxYs2fP1s033yyTyaQRI0ZoypQpZ1RHgwYNThqEi4mJ0aJFi5SXl6fRo0crPDxcffr00Y4dO858RyvccMMNCgoKkq+vrx588EFNnDhRHh4ep/wOfRBwLvQt+hZQm3GeCMDV0LcAuBr6FgC4HoK+AHAOhgwZovnz5ystLU1t2rTRiBEjFBkZqXvvvVdZWVmOv7y8PL377ruSdMIpLxITE7Vp06bjlm/YsEGJiYkXejcA1EAjRoxQRkaGgoKCNHny5JOuFxYWdtIHEfbt21dpCtfGjRvro48+0rBhw/TDDz+cUR1Tp06V1WrVwYMHdd99953VPgCoXehbAKrDmfYaSQoMDHSc0xUWFurWW29Vt27dVFRUJKl8ysKAgABdddVVkqSbbrpJGzZs0NKlS09bx969exUSEqJdu3bJz8/P8bdr1y5JUtu2bfXJJ59oz5492rJli+x2u2688UZJkru7uySptLS00jZLS0sdnx3x2WefKSsrS0VFRVqyZIkefvhh/fLLL6esjT4IOBf6Fn0LqM04TwTgauhbAFwNfQsAXA9BXwA4S5s3b9bcuXNVWFgoDw8P+fn5yc3NTbfffrumTp2q+fPny2q1qri4WEuWLNHGjRslSXXr1tWOHTtkt9sd2+rXr5+Sk5NltVor/cb8+fPVr1+/at0vADVHcHCw+vTpozVr1px0nT59+uinn346blTxlJQULVu2TH369Km0fMSIEfrggw80ZMgQfffdd6etYd26derRo0elE3wAOBn6FoDqcCa95p88PT11xx13KCUlRevXr5ckTZkyRdnZ2YqMjFRERISSkpJkMplOOzrmkiVLtG/fPnXv3l1RUVHKy8tz/EVFRR23flxcnO69916tXbtWkhQRESEPDw+lpKRUWm/79u3HTbF4hMlk0kUXXaQuXbroxx9/PGV99EHA+dC36FtAbcZ5IgBXQ98C4GroWwDgWgj6AsBZKikp0VNPPaW6desqNDRUv/32mz766CNddNFFmjFjhsaPH6/w8HA1aNBATz31lIqLiyVJo0ePdoyC0rp1a0lS69at1aRJE/3888+O7ScnJysgIEBJSUmG7B+AmsHT01MlJSUn/fzGG29UXFycBg4cqM2bN8tqteqvv/7SNddco379+qlnz57HfeeGG27Qf//7Xw0dOlTffPPNKX+/pKTkjKflAQCJvgWgepyu1/xTWVmZPvjgA/n4+Cg2NlYrV67U6tWrNXfuXK1atcrx99577+mLL75Qfn7+cdsoKirSzz//rBtvvFGjR49WixYtTvhbycnJeueddxyjpBw4cEAffPCBOnfuLEmyWCwaNmyYxo8fr3379slms2nx4sWaMmWKbrjhhpPuw9q1a5WcnKyEhIRT7it9EHBO9K2To28BNR/niQBcDX0LgKuhbwGA6yDoCwBnKSEhQUuXLlVOTo6ysrK0cOFCtWnTRpLUq1cvLVq0SIcPH1Z6errmzZunxMRESeWjmqxcuVKZmZmVnoqbNGmSnn32Wcf7Z599Vi+99FK17hOAmsdsNstms530cw8PD82dO1cJCQnq1auXfH19NWjQIA0YMEBffPHFSb83dOhQffTRRxo+fLi++uqrk65ntVplsVjOax8A1C70LQDV4XS9RpKys7MdU9OHhYVp1qxZ+v777xUcHKwpU6aoR48e6tatmyIiIhx/I0eOlJ+fX6V+1LBhQwUEBKhhw4aaNGmSxo8fr/fee++kvxscHKw5c+aoXbt28vX1Vdu2bRUcHKxp06Y51nnzzTcVHx+vTp06KSgoSLfffrsmTpyogQMHVtrWsGHDHPvQv39/3XnnnbrttttOud/0QcA50bdOjr4F1HycJwJwNfQtAK6GvgUArsNkP3YO+ZPIyclRYGCgsrOzFRAQUB11AQAA4Dy8//77eumll7Ry5cpqP34rKChQly5ddMMNN+ihhx6q1t8G4LroWwCqg5G9xpnRBwHnRd86MfoWUDtwngjA1dC3ALga+hYAGOtscrmM6AsAAFADDR48WI0bN1Z0dLRefvnlavvdDz74QA0aNFBISIhGjBhRbb8LwPXRtwBUB6N6jTOjDwLOjb51PPoWUHtwngjA1dC3ALga+hYAuA5G9AUAAAAAAAAAAAAAAAAAAACqCSP6AgAAAAAAAAAAAAAAAAAAAC6OoC8AAAAAAAAAAAAAAAAAAADghAj6AgAAAAAAAAAAAAAAAAAAAE6IoC8AAAAAAAAAAAAAAAAAAADghAj6AgAAAAAAAAAAAAAAAAAAAE6IoC8AAAAAAAAAAAAAAAAAAADghAj6AgAAAAAAAAAAAAAAAAAAAE6IoC8AAAAAAAAAAAAAAAAAAADghAj6AgAAAAAAAAAAAAAAAAAAAE6IoC8AAAAAAAAAAAAAAAAAAADghAj6AgAAAAAAAAAAAAAAAAAAAE6IoC8AAAAAAAAAAAAAAAAAAADghAj6AgAAAAAAAAAAAAAAAAAAAE6IoC8AAAAAAAAAAAAAAAAAAADghAj6AgAAAAAAAAAAAAAAAAAAAE6IoC8AAAAAAAAAAAAAAAAAAADghAj6AgAAAAAAAAAAAAAAAAAAAE6IoC8AAAAAAAAAAAAAAAAAAADghAj6AgAAAAAAAAAAAAAAAAAAAE6IoC8AAAAAAAAAAAAAAAAAAADghAj6AgAAAAAAAAAAAAAAAAAAAE6IoC8AAAAAAAAAAAAAAAAAAADghAj6AgAAAAAAAAAAAAAAAAAAAE7IzegCAMBIdrtdVrvRVVQts0kym0xGlwHgDNXEPnSh0eeAmseZeqHFJJnoMQBOwZl61hH0LqB2csZ+VNXobwBOxWq3y36efZDrTADOh81ul83Jj8c4ngJQ1U52LmoySRb6DQBcMAR9AdRKBwrK9Mf+AqXmlqjMyU/Az0W4l0UXhXmpbbi30aUAOIlt2SVadrBAe/LKVAPb0AUX6mlR61BPdazjzUVKwIWlFZTpjwMFSslxnmMyk6RIP3d1quutmAAPo8sB4ER25ZZqcVqBduWVOt2NXHoXULvsyCnR0rRC7c4rrfHnk2aTFOXnrs51fRTl7250OQCcQKnNroX78rUpq0R5pbYq2Wawp1kJIV7qVJfrTABOz263a/nBQq3JKFZGsdXock7LJKmhn5suruOjxoGcLwI4d3vzS7Vof4F25pWe9KFTXzeTmgV5qnt9H3lamGQeAKqSyW4//bOuOTk5CgwMVHZ2tgICAqqjLgC4YA4XWTVtS5b83M1KCPGUn3vNOsAss0kpuSXanFWiXg181bEOYV/A2aTklGjm9hw18HVTfLCnvCzcQDgbVnt50GZ9ZrE61/VWt/q+RpcE4BxkFls1bXOWfNzMah3qPMdkhWV2bcws1oHCMg1tHKgoPwIlAKR9+aWavjVbYV5uahHiKR835zp+Kyqza2NWsfYXlOn6uAA18ufmLVBT7cwt0Rfbc1TPx03Ngzzl5WT9qKoVlNm14XCx0ovKNLxJoOr7cmwG1HYzt2Vrd36p2oR6qY63m8zn2QatdmlPXqnWHi5W+3Av9W7oVzWFAqixft+Xr8VphWoZ7Kkof3c5++X9IqtdmzKLtTe/TEPiAng4FMA5OVhYpk+2ZCnY06KWwZ7yPcH1fLtdOlRk1er0IoV7W3RDk0AeogKA0zibXC4j+gKoddZkFMksaUTTQHnV0KfIEsO89NPOXP15sFAdwr04gAaczIpDhYrwcdPwJoFMDXiO2oR6ydfdrJWHitQ5wkdu53tXB0C1W5tRJLukm5oGysvNuY7J2oZ76aNNWfrrUCFBXwCSpL/Si+TvYdaNTQOd9rjjonAvTducpb/Siwj6AjXYykNFCvOyaHiTwFozJWrbMC9N2ZSpv9OLCPoCtdyhwjLtyC3VgGh/NQ/2rLLttgn1UoCHWcvSCtWtnq88nD21B8AwZTa7Vh4q0sV1vNWzgesMQNE2zEufbMnWikOFBH0BnJNV6UXyspg1ommQ3E9zbayRn7tm7cjR/oIyzuEAoAo5191UAKgGewtKFe3vXmNDvkc0DfJUbqlNOVU0fRmAqrM3v0xNAj0I+Z6nZkEeKrbZlVHk/NOjATjevoIyNfJzd7qQryRZTCY1DvTQ3vwyo0sB4CT25ZcpNsDDaUO+UkXvCvDQ3jx6F1CT7csvU+MAj1oT8pUkN7NJsQEcmwEoP4+UpCYXYOr5ZkGeKrOXj1YHACeTUWRVsc2upkGuFZY1c60LwHnam1+q2AD304Z8JSkmwF1uJmkPPQcAqpTz3VEFgAuszKYL+kT+hAkTjhtBNzo6WiNHjrxgv3kiHhUH2VZyvoDTKbPZHf+NuroePXqoR48ehvz2kX/DMrvdkN8HcH7KbPZzPiZbsGCBTCaTFixYULVFHcPDbKK/AHAos9vlWQ3Hb2d6bHWyPuhhoXcBNV2Z/dyPof7JZDJpwoQJjvdHrmmlp6dXyfarkifHZgBUfh5pMemCPHzFdSYAZ+JIj3DF6/seZpPKbPQ4AOemzH7mvc9sMsndbJKVngMAVYqgLwBU+Oijj2QymU7499hjjxld3tlzvWsMAJzUhg0bNGHCBKWmphpdCgAAAADgBBYvXqwJEyYoKyvL6FIqcda6ANQs9BoARjtyj3HFihVGl3JS3DYEUK1oOgBQ5dyMLgAAnM3//d//KSYmptKyVq1aGVQNABhvw4YNeuaZZ9SjRw9FR0cbXQ4AAECN9euvvxpdAgAXtXjxYj3zzDMaOXKkgoKCzug7hYWFcnO7sLcIzqUuADhb9BoAAAAAQE1H0BcA/uHKK69U+/btjS4DAM6J3W5XUVGRvL29jS7lrNlsNpWUlMjLy8voUgAAAAzh4eFhdAkAarhjz7s49wIAAAAAAABcg9noAgDAlfz8889KSkqSr6+v/P391bdvX61fv/6ctrVjxw4NHjxYISEh8vHx0SWXXKIff/zR8bndbldYWJgeeOABxzKbzaagoCBZLJZK05BNmjRJbm5uysvLO+d9A2CsNWvWyGQy6bvvvnMsW7lypUwmk9q2bVtp3SuvvFIXX3yxJCk6Olr9+vXTnDlz1L59e3l7e+u9996TJGVlZem+++5TZGSkPD091bhxY02aNEk2m63S9j7//HO1a9dO/v7+CggIUEJCgt544w1J5VOODR48WJLUs2dPmUwmmUwmLViw4KT7UlxcrKefflqNGzeWp6enIiMj9cgjj6i4uLjSeiaTSWPHjtVnn32mli1bytPTU7/88stpawJQc+3cuVN33XWXmjVrJm9vb4WGhmrw4MFKTU097Xe3bt2q6667ThEREfLy8lLDhg01dOhQZWdnO9YpKyvTv//9b8XFxcnT01PR0dF64oknjutPAHCmJkyYIJPJpE2bNmnIkCEKCAhQaGio7r33XhUVFTnWmzp1qnr16qU6derI09NTLVq00Lvvvnvc9nr06KEePXpUWrZnzx4NHDhQvr6+qlOnju6//376FoBKJkyYoIcffliSFBMT4zhvS01NPeV5l8lk0oQJE47bXnp6+il72pHtfvTRR8d999htnqquIz799FO1a9dO3t7eCgkJ0dChQ7V79+6q+YcBUCucqtdwDgjAWYwcOfKEs+UdOac81pHjt2+++UatWrWSp6enWrZs6TiGO+J8rqMBAADA9TCiLwD8Q3Z2ttLT0ystCwsL0yeffKKbb75Zl19+uSZNmqSCggK9++676tq1q/7++++zms4+LS1NnTt3VkFBgcaNG6fQ0FBNmzZN/fv315dffqlrrrlGJpNJXbp00e+//+743po1a5SdnS2z2axFixapb9++kqTk5GRddNFF8vPzq5J/AwDVr1WrVgoKCtLvv/+u/v37Syr/b9tsNmv16tXKyclRQECAbDabFi9erDFjxji+u3nzZg0bNky33367brvtNjVr1kwFBQXq3r279u7dq9tvv11RUVFavHixHn/8ce3fv1+vv/66JGnu3LkaNmyYLr30Uk2aNEmStHHjRi1atEj33nuvunXrpnHjxunNN9/UE088oebNm0uS43/+k81mU//+/fXHH39ozJgxat68udauXavXXntNW7Zs0TfffFNp/d9++00zZ87U2LFjFRYWpujo6NPWBKDm+vPPP7V48WINHTpUDRs2VGpqqt5991316NFDGzZskI+Pzwm/V1JSossvv1zFxcW65557FBERob179+qHH35QVlaWAgMDJUmjR4/WtGnTNGjQID344INatmyZXnjhBW3cuFGzZ8+uzl0FUMMMGTJE0dHReuGFF7R06VK9+eabyszM1McffyxJevfdd9WyZUv1799fbm5u+v7773XXXXfJZrPp7rvvPul2CwsLdemll2rXrl0aN26c6tevr08++US//fZbde0aABdw7bXXasuWLZoxY4Zee+01hYWFSZLCw8Mlnfi861RO19Oqqq7nnntOTz31lIYMGaLRo0fr0KFDeuutt9StWzf9/fffCgoKOrt/CAC10ql6DeeAAFzVH3/8oa+//lp33XWX/P399eabb+q6667Trl27FBoaKuncr6MBAADANRH0BYB/6N2793HLcnNzNW7cOI0ePVrvv/++Y/nNN9+sZs2a6fnnn6+0/HQmTpyotLQ0JScnq2vXrpKk2267Ta1bt9YDDzygAQMGyGw2KykpSY899phyc3Pl7++v5ORkNWrUSHXr1lVycrL69u0rm82mRYsW6ZZbbjn/nQdgGLPZrC5duig5OdmxLDk5WQMHDtS3336rxYsX64orrnCEfpOSkhzrbdu2Tb/88osuv/xyx7Jnn31W27dv199//60mTZpIkm6//XbVr19fL730kh588EFFRkbqxx9/VEBAgObMmSOLxXJcXbGxsUpKStKbb76pPn36HDfC3D9Nnz5d8+bN08KFCx39TSoPMt9xxx1avHixOnfu7Fi+efNmrV27Vi1atHAsu++++05ZE4Caq2/fvho0aFClZVdffbU6deqkr776SiNGjDjh9zZs2KCUlBTNmjWr0vf/9a9/OV6vXr1a06ZN0+jRo/XBBx9Iku666y7VqVNHL7/8subPn6+ePXtegL0CUBvExMTo22+/lSTdfffdCggI0DvvvKOHHnpIrVu31sKFC+Xt7e1Yf+zYsbriiiv06quvnjLo+/7772vLli2aOXOmY5aF2267TW3atLmwOwTApbRu3Vpt27bVjBkzNHDgwOOCvCc67zqV0/W0qqhr586devrpp/Xss8/qiSeecCy/9tprddFFF+mdd96ptBwATuZkvYZzQACubOPGjdqwYYPi4uIklc+216ZNG82YMUNjx46VdO7X0QAAAOCazEYXAADO5j//+Y/mzp173F9WVpaGDRum9PR0x5/FYtHFF1+s+fPnn9Vv/PTTT+rYsWOlEJyfn5/GjBmj1NRUbdiwQZKUlJQkq9WqxYsXSyoP/SUlJSkpKckRBly3bp2ysrIqhf4AuKakpCT99ddfys/Pl1T+1P5VV12lxMREx3/zycnJMplMlfpHTExMpZCvJM2aNUtJSUkKDg6u1Ld69+4tq9XqGC08KChI+fn5mjt3bpXsw6xZs9S8eXPFx8dX+t1evXpJ0nH9snv37sfdbK7qmgC4jmNDcKWlpcrIyFDjxo0VFBSkv/7666TfOzJi75w5c1RQUHDCdX766SdJ0gMPPFBp+YMPPihJ+vHHH8+rdgC12z/Duvfcc4+ko73n2P52ZBaZ7t27a8eOHcrOzj7pdn/66SfVq1ev0s1bHx+fSrM7AMDpnOi861RO19Oqwtdffy2bzaYhQ4ZUOneMiIhQkyZNzvpaGwD8E+eAAFxZ7969HSFfqfyhhoCAAO3YscOx7FyvowEAAMA1MaIvAPxDx44d1b59+0rLXnzxRUlyBNX+KSAg4Kx+Y+fOnbr44ouPW968eXPH561atVLbtm3l4+Oj5ORkXX755UpOTtYzzzyjiIgIvfXWWyoqKnKE/44N/QFwTUlJSSorK9OSJUsUGRmpgwcPKikpSevXr68U9G3RooVCQkIc34uJiTluW1u3btWaNWscU6L+08GDByWVj2Qyc+ZMXXnllWrQoIEuu+wyDRkyRFdcccU57cPWrVu1cePG0/7uqWqv6poAuI7CwkK98MILmjp1qvbu3Su73e747FRBuJiYGD3wwAN69dVX9dlnnykpKUn9+/fXjTfe6AgB79y5U2azWY0bN6703YiICAUFBWnnzp0XZqcA1ApHZlA4Ii4uTmazWampqZKkRYsW6emnn9aSJUuOeyAhOzvb0av+aefOnWrcuLFMJlOl5c2aNau64gHUeCc67zqV0/W0qrB161bZ7fbjfusId3f3KvstALUT54AAXFlUVNRxy4KDg5WZmel4f67X0QAAAOCaCPoCwBmw2WySpE8++UQRERHHfe7mdmHaqbu7uy6++GL9/vvv2rZtmw4cOKCkpCTVrVtXpaWlWrZsmZKTkxUfH3/SUB0A19G+fXt5eXnp999/V1RUlOrUqaOmTZsqKSlJ77zzjoqLi5WcnKxrrrmm0veOfXL/CJvNpj59+uiRRx454W81bdpUklSnTh2tWrVKc+bM0c8//6yff/5ZU6dO1U033aRp06ad9T7YbDYlJCTo1VdfPeHnkZGRp629qmsC4DruueceTZ06Vffdd586deqkwMBAmUwmDR061HE8djKvvPKKRo4cqW+//Va//vqrxo0bpxdeeEFLly5Vw4YNHev9MywHABfCsb1m+/btuvTSSxUfH69XX31VkZGR8vDw0E8//aTXXnvttP0NAM7Xic67zsY/j59OdjxltVrPeJs2m00mk0k///yzLBbLcZ/7+fmdXZEAcBKcAwJwBmd7/HSi4yNJlcK853MdDQAAAK6HoC8AnIEj0+PUqVNHvXv3Pu/tNWrUSJs3bz5u+aZNmxyfH5GUlKRJkyZp3rx5CgsLU3x8vEwmk1q2bKnk5GQlJyerX79+510TAON5eHioY8eOSk5OVlRUlJKSkiSV94Hi4mJ99tlnSktLU7du3U67rbi4OOXl5Z1Rz/Lw8NDVV1+tq6++WjabTXfddZfee+89PfXUUyccQe50v7t69Wpdeuml53Uj5XQ1AaiZvvzyS91888165ZVXHMuKioqUlZV1Rt9PSEhQQkKCxo8fr8WLF6tLly6aPHmynn32WTVq1Eg2m01bt251zKIgSWlpacrKyqp0/AUAZ2vr1q2VRszctm2bbDaboqOj9f3336u4uFjfffddpVGZzmRa+kaNGmndunWy2+2Vjq1OdD4JoHaryiDbqXqaVD6anKTjjtFONDrmyeqKi4uT3W5XTEyM40FUADhXJ+o1nAMCcCbBwcEnvL51PqOLn+91NAAAALgWs9EFAIAruPzyyxUQEKDnn39epaWlx31+6NChs9reVVddpeXLl2vJkiWOZfn5+Xr//fcVHR2tFi1aOJYfCfi9/vrr6tq1q+OiZVJSkj755BPt27fPEQYE4PqSkpK0bNkyzZ8/3/HfdlhYmJo3b65JkyY51jmdIUOGaMmSJZozZ85xn2VlZamsrEySlJGRUekzs9ms1q1bS5KKi4slSb6+vo7vncnv7t27Vx988MFxnxUWFio/P/+02ziTmgDUTBaLpdLIJJL01ltvnXZ0uJycHEdfOyIhIUFms9nRN6666ipJ0uuvv15pvSMjkPft2/d8SgdQy/3nP/+p9P6tt96SJF155ZWOkZj+OY3q1KlTT7vdq666Svv27dOXX37pWFZQUKD333+/KsoGUIOczXnb6Zyqp0lSQECAwsLC9Pvvv1da75133jnjuq699lpZLBY988wzxx3/2e32484LAeBUTtRrOAcE4Ezi4uKUnZ2tNWvWOJbt379fs2fPPudtnut1NACoSgUFBdq0aZPS09ONLgUAajxG9AWAMxAQEKB3331XI0aMUNu2bTV06FCFh4dr165d+vHHH9WlSxe9/fbbZ7y9xx57TDNmzNCVV16pcePGKSQkRNOmTVNKSoq++uormc1Hn8Po1KmT3NzctHnzZo0ZM8axvFu3bnr33XclnVnoD4BrSEpK0nPPPafdu3dX+m+7W7dueu+99xQdHV1pCvqTefjhh/Xdd9+pX79+GjlypNq1a6f8/HytXbtWX375pVJTUxUWFqbRo0fr8OHD6tWrlxo2bKidO3fqrbfeUmJiomO0k8TERFksFk2aNEnZ2dny9PRUr169VKdOneN+d8SIEZo5c6buuOMOzZ8/X126dJHVatWmTZs0c+ZMzZkzR+3btz9l7WdSE4CaqV+/fvrkk08UGBioFi1aaMmSJZo3b55CQ0NP+b3ffvtNY8eO1eDBg9W0aVOVlZXpk08+kcVi0XXXXSdJatOmjW6++Wa9//77ysrKUvfu3bV8+XJNmzZNAwcOVM+ePatjFwHUUCkpKerfv7+uuOIKLVmyRJ9++qmGDx+uNm3ayMvLyzFbwe233668vDx98MEHqlOnjvbv33/K7d522216++23ddNNN2nlypWqV6+ePvnkE/n4+FTTngFwFe3atZMkPfnkkxo6dKjc3d119dVXn9O2TtXTjhg9erQmTpyo0aNHq3379vr999+1ZcuWM64rLi5Ozz77rB5//HGlpqZq4MCB8vf3V0pKimbPnq0xY8booYceOqf6AdQ+J+s1nAMCqG7//e9/9csvvxy3fMSIEXr00Ud1zTXXaNy4cSooKNC7776rpk2b6q+//jqn3zrX62gAUJWWL1+unj176umnn9aECROMLgcAajSCvgBwhoYPH6769etr4sSJeumll1RcXKwGDRooKSlJt9xyy1ltq27dulq8eLEeffRRvfXWWyoqKlLr1q31/fffHzeSgK+vry666CL9+eef6tq1q2P5kQBgZGQk04wBNUjnzp1lsVjk4+NT6SZqUlKS3nvvvTMO9vv4+GjhwoV6/vnnNWvWLH388ccKCAhQ06ZN9cwzzygwMFCSdOONN+r999/XO++8o6ysLEVEROj666/XhAkTHA8dREREaPLkyXrhhRd06623ymq1av78+ScM+prNZn3zzTd67bXX9PHHH2v27Nny8fFRbGys7r333jOakvVMagJQM73xxhuyWCz67LPPVFRUpC5dumjevHm6/PLLT/m9Nm3a6PLLL9f333+vvXv3Onrozz//rEsuucSx3ocffqjY2Fh99NFHmj17tiIiIvT444/r6aefvtC7BqCG++KLL/Svf/1Ljz32mNzc3DR27Fi99NJLkqRmzZrpyy+/1Pjx4/XQQw8pIiJCd955p8LDwzVq1KhTbtfHx0f/+9//dM899+itt96Sj4+PbrjhBl155ZW64oorqmPXALiIDh066N///rcmT56sX375RTabTSkpKee0rVP1tCP+9a9/6dChQ/ryyy81c+ZMXXnllfr555+PO088WV2+vr567LHH1LRpU7322mt65plnJJVf57rsssvUv3//c/uHAFArnazXcA4IoLodGaDnn0aOHKnZs2frgQce0COPPKKYmBi98MIL2rp16zkHfc/1OhoAAABck8n+z/kcTiAnJ0eBgYHKzs5WQEBAddQFABfMtM1ZquNt0ZVR/kaXckHtyivV9K3ZGtM8WCFeFqPLAXCMl1elq0d9X7Wv4210KS7tUGGZpmzK0oimgWrg6250OQDO0qdbshTkaVG/Rs55TLbkQIGWHyrUvQmMggJAemf9YbUK9lS3+r6Vlk+YMEHPPPOMDh06pLCwMIOqO2ppWoGWphXqvtb0LqCmen1Nhi6p661L6taukb1/35evdZnFuqtliNGlADDQykOF+m1vvh5OrPrjrqxiqyZvyNTQxgGK9veo8u0DqBn25pfqky3ZujU+SOHerjWm2oqDhVqwL18PXYAeCqDm+2BjpmL93XVpQ78zWv+NtRnqGO6tThG169wVAM7W2eRyGRINAAAAAAAAAAAAAAAAAAAAcEIEfQEAAAAAAAAAAAAAAAAAAAAnRNAXAGoqu9EFAAAAnJjNbldOUZnRZQAAAAAAAAC1ArcNAVQrmg4AVDmCvgBqHTezVGKt+UeWJbbyfbTQ6QGn42Y2Of4bxbk78m/oZjIZXAmAM2Wz25W8PUNjZ63TrsxCFTvxMVmJzU5/AeDgZjKp+ATHbxMmTJDdbldYWJgBVR2vxErvAmo6N5OpVlzX+qdijs0AqPyamtUulV2A62pcZwJwJo70CFe8vl9is8vNTI8DcG7cTGfe+2x2u0ptdlnoOQBQpYh/Aah1Gvi4KzW3VEVWm9GlXFBbsorl725WgDutHnA2DXzdtDW7RDa7610MdCabs0rkaTYp1MtidCkATsNqs2vhtgzdPWudJs7brp2ZhcrJLVJqTomKypzvmMxqt2tbdoka+LoZXQoAJ1Hf1007ckouSKikqljtdm3LKVEDP3oXUJPV93XTtpwSWWvR+WSZza4dORybAZDq+5T3ga3ZJVW+7c1ZxXIzSXW86TUATi7UyyJPs0lbsqq+D11INq51AThPDXzdtSOnVKVncG0sJadUZXapIT0HAKoUXRVArdM61Et/ZxTpky3ZSgjxlF8NC8KW2aSU3BJtzipRrwa+MjECAeB02od7a+b2HE3fmq34YE95Wfjv9GxYbdKuvFKtzyxW57rejEIAODGrrXwE38//2qfdWUWVPvNTecD34y1ZSgjxkr+HcxyTFZbZtTGzWBnFVvWJ9DO6HABOom2YlzZlFuvTLdlqEeIpHzfnOv4oLLNrU1ax0ousurSBr9HlALiA2oV76Ysj55NBnvJ2sn5U1QrK7NpwuFi5JTZd1MjL6HIAGCzc202x/u76aVeu9uSXqq63m873spDVLu3JK9Xaw8VqH+4lD67TATgFN7NJ7cK9tDitUHmlNkX5uTv9zJpFVrs2ZRbrQEGZhsQFGF0OABeVGOaltYeL9MmWLLUM9pTvCTIWNruUXmTV6vQiNfR1Uz0fImkAUJVMdvvpH/3PyclRYGCgsrOzFRDAwR8A13egoEx/HChQak6JymrgACh1vC26KMxLF4V5G10KgJPYll2iZQcLtCevTDWwDV1woZ4WtQ71VMc63jzQADghq82u3ysCvnv+EfBtGeGnYe0aKLFBgA4WWvXHgQKlONExmVlSpJ+7LqnrrZgAD6PLAeBEduWVasmBAu3MK5WzDexrUnnv6kTvAmqFlJwSLUkr1O680hp/Pmk2SY383NUpwkdRfu5GlwPACZTa7Fq4L1+bskqUV1o1M8SUlpSprKBY47tFyt3ZE3sADGe327X8YKHWZJQ/KO7sTJIa+rnp4jo+ahzI+SKAc7c3v1SLDhRoZ26prCc5GfV1M6lZkKe61/eRJ8dVAHBaZ5PLJegLoFaz2+0nPQh1VWaTZCb0BrgMo/vQX7uzNel/2yVJl0QH6/4eMVW6/TKbTZ8v36MpyTtVbD168+XKlnU17tI4Bfqc/Y1a+hzgvKw2uxZuKw/47s3+Z8DXXze0b6DW9f2PC+gb3QuPZTGJBwgAnFJ19iy73a43FqZqSWqmJKmOv4ee7xsvf6/KI6LQu4DayZmOoU5myY4M/fvnbSqtOB9sFuGr565urkDvMzsXpL8BOBWr3a7T3+U8uYISqx79bqPSckskSde2idCN7RtUUXUAagOb3X5BHwR9Z9EurdyTLUm6N6mRWtc/+6wGx1MAqtqx56K3zliljPxShfq667/DE2Wh3wDAWTmbXC7jpAOo1Uwmk2r4DIcAnJzRfej37YcdN0R6NA6R2/nOd/gPbmaLRnZupF7x4Xrux81auTNLkvTj2gNavD1DD13eRJe1qMOFRsDFWW12zd+ari/+3qd92cWVPkuo569h7U4c8D3C6F4IAGejenuWSfd0i1ZabrG2pxcoLadEry3YoX9d0bTKj9sAuB5XOIZKahyml65x1xPfbVResVUb9+fpvlnr9NK1LRQR4GV0eQBcnMVkKh+q8hwFeLnpgZ6xeuL7TbLapdmrDyihnr/aNGDQIwBnxmwy6UKemnWNCdKK3eVB3yWpWWrbMPDC/RgAnKFjz0Xt9qN/hHwB4MJinHQAAIBaKr+4TH/uypJUfmPjogt4kTAqxEeTb0zU+L7N5OdZ/qxZZkGpnpy9QQ/MXKsD/xj5E4BrsNrsmrv5kG7/Yo1eW5BSKeSbUN9fL1wdr4n9m6tNgwAC/QBwjjzdzHq0d5yCvMuPodbtz9OUJbsMrgoAzlybhoF6c0iCQn3LR/HdlVmosV+sVUp6gcGVAYDUJNxXN1SM4muX9MbCFGUVlhpbFABUaF3PX/6eFknSX3tyVFBiNbgiAAAAGIWgLwAAQC21ODVLpRVz6yTFVv1ovv9kMpk08KL6+vKOjuoZH+5Ynrw1Q9e/t1yzVuyV7XzmWwRQbcqsNv266ZDGfLFGry9I0f6cowHfNvUDNOnqeE28uvk5TScIADhemK+HHu0d5zhem7MpXb9sPGRwVQBw5uLCfPWf61urYVD5KL6H8kp0z8y1Wrcvx+DKAEDqn1BXiRWj+GYVlumt31O5RgXAKbhZzLqkUZAkqdRm158Vo/sCAACg9iHoCwAAUEst3JrheN29cUi1/W6Yv6deGtRKLw5qpVA/D0lSfolVk37Zotum/a3U9PxqqwXA2Smz2jRn40GN+WKt3liYogPHBHwTGwToxf7N9fzV8WpFwBcAqlyzOn66s2sjx/sPl+zS2n25BlYEAGenXqCX3r4+Qc3q+EqScovL9MBX67Vkx2GDKwNQ25lNJo3rHu2YQeHvPTn6bl2awVUBQLkuMcGO13+kZBpYCQAAAIxE0BcAAKAWSsst1oa0PElSwyAvxYX5VHsNveLD9eUdHTUwsZ5j2eo92Rr2wZ/6MDlVpVZbtdcE4MRKrTb9svGgbvt8jd78PVVpuUcDvhc1DNBLA5rruX7xalnP38AqAaDm69kkVAMS6kqSbHbppd+2V3roAgCcXbCPh14f3ErtogIlScVlNj353Ub9sv6gwZUBqO2CvN01rnuM4/1nf+7V1kM8jA7AeDEh3qoX4ClJ2nwwX4fySgyuCAAAAEYg6AsAAFALLdx2dMSkHo1DZTKZDKnD38td4/vFa/KNiYoM9pYklVrtmrwwRSOmrNC6vUzjChip1GrTzxvKA75v/Z6qg8fcSGjbMFAvD2iuZ/vGq0UEAV8AqC43tm+gtg3LR07PK7bqhbnbVFBiNbgqADhzPh5umjighXo1DZMkWe3SC79u1YwVewyuDEBtl9ggQNe2jpBU3ptenb+D4ywAhjOZTOp6zKi+i1MZ1RcAAKA2IugLAABQy9jtdi3cliFJMknqFhdibEGS2kcHa8aYDrq5c5QsFaHjbQfzdcvUlXrl160q5KYKUK1KrTb9VBHwfTs5tdJIIe0jA/XKwBb6d99mak7AFwCqncVs0v09Y9UwyEuStDurSK8vSJHVZje4MgA4cx5uZj11VVNdc8wML5OTd+rd31Nks9PPABhnaLv6ahruK0lKyy3Re4t2yk5fAmCwTtFBjtd/pGTSlwAAAGohgr4AAAC1zJaD+dpfMcVzq3r+CvPzMLiicl7uFt3TK07TRrVTswg/SZJd0ozle3T9e8u1dPvhU28AwHkrtdr0w/o0jZ6xRv/5Z8A3KlCvXtNCz1zVTPF1/QysEgDg62HR433i5OdhkSSt2J2t6Sv3GlwVAJwds8mke3vE6NbOUY5ln6/cp4lztqrMajOwMgC1mZvZpPt7xsjHvfwWavKOTP22NcPgqgDUdmG+Hmpe9+hDCNszCgyuCAAAANWNoC8AAEAts2Db0cBsjyahBlZyYvH1/DVtVDvd0ytWnm7lh6v7sos0dsZqPf3tRmUVlBpcIVDzlJTZ9P26NN06Y7Xe/WOn0vOPBnw7RgXptWta6Jkrm6lZHQK+AOAs6gV46cFesTKXT4ag2WvSHLM2AICrMJlMuuniSD14aZyjn83ZeEjjv9+kolJmdgFgjLr+nrorKdrx/sMlu7Unq9C4ggBAUteYYMfrRSlZxhUCAAAAQxD0BQAAqEVKrTYt2lEe9PWwmHRJoyBjCzoJN7NZN3dupBljOqjdMTX+uPaABk9epjnr05ieDKgCJWU2fbfugEbPWK3Ji3YqI/9okP7iRkF6/dqWevrKpmpKwBcAnFKbBgG65eJIx/t3/tiprYfyDawIAM5N/9YReqZvvDws5WnfJSmZeuCr9cop4kFPAMboHBOsPs3CJEnFZTa9Mj9FJWWMNg7AOO0jAx3HSkt3ZqmUGRAAAABqFYK+AAAAtcjK3dnKKykfFemS6GB5V0z37KyiQnw0+cZEje/bTH6ebpKkzIJSPTl7gx6YuVYHsosMrhBwTcVlNn279oBunbFa7y3apYxjRsq+JDpIb1zbUv+6oqmahPsaWCUA4Exc1SLcEUIptdo1cd52ZRwzMjsAuIpuTUL14jUt5Vtxnrp+f67u+WKtDuYWG1wZgNpq1CWRigzykiTtPFyoacv3GFwRgNrM292idg0DJUn5JVat3pdrcEUAAACoTgR9AQAAapEF2w47XvdoHGpgJWfOZDJp4EX19eUdHdUzPtyxPHlrhq5/b7lmrdgrG6P7AmekqNSq2WvKA77vL96lw8cEfDtFB+vN61rqqcubqjEBXwBwGSaTSaM7RapFRPno65kFpZo4b7uKGXEOgAu6KDJQbwxupRAfd0lS6uFC3f3FGqVmFBhcGYDayNPNrAd6xjpG0Px54yEtS80ytigAtVqXmGDH60UpmQZWAgAAgOpG0BcAAKCWyCkq01+7syVJwT7uSqjvb3BFZyfM31MvDWqlFwe1Uqifh6TykQsm/bJFt037W6npTFMNnEx5wHe/bp2xRh8u2aXMYwK+XWKC9dagVhp/eRPFhRHwBQBX5G4x6+FesQqvOEbanl6gd5JTZedhKAAuqEkdP719fYIaBJaPonkwt0T3zFyrDfsZtQ5A9WsU4q1bLo50vP9PcqrS85g9AYAxWkb4Kci7fOa7VftylVtcZnBFAAAAqC4EfQEAAGqJRTsOq8xWHvboFhcii9lkcEXnpld8uL68o6MGJtZzLFu9J1vDPvhTHyanqtTK6HXAEUWlVn21er9unbFaHy7ZrazCYwK+scF6e1ArPXFZE8WG+hhYJQCgKgR6u+vxPnHyciu/3Je8I1Oz16QZXBUAnJsGQd56+/oEx0wTOUVluv/LdVqWysh1AKrfZfFhuiQ6SJKUV2LVawtSZLXxQBWA6mcxm9SpUZAkyWqza9nOLEPrAQAAQPUh6AsAAFBLLNiW4Xjdo3GogZWcP38vd43vF6/JNyaqYbC3JKnUatfkhSkaMWWF1u3NMbhCwFiFpVZ9uWq/Rk1frf8u3a2swvLRPUySkmJD9J/BrfREnyaKIeALADVKdIiPxnWPdrz/bMVe/bkry7B6AOB8hPh66I3BrZTYMECSVFRm0+PfbtTcjQcNrgxAbWMymXRX10YK8y2fPWFjWp5mrdpvcFUAaqsuMcGO14tSsowrBAAAANWKoC8AAEAtsDerSFsPFUiSYkK81SjE2+CKqkb76GB9PqaDbu4cJYupfITibQfzNeqjlXp17lYVllgNrhCoXoWlVs1atU+jpq/W1GW7lV10NODbLa484PtYn8aKDiHgCwA11SXRwRrWtr4kyS7ptQUp2nm40NiiAOAc+Xm66cVrWqpbxcOqVptdz/6yVbP+2mdwZQBqGz9PNz3QM0ZHJsia9fd+rdufa2xRAGqlqGBvRQV5SZK2ZxRof06xwRUBAACgOhD0BQAAqAUWHjOab3cXH833n7zcLbqnV5ymjWqnZhF+kiSbXZq+bI+uf2+5lm4/bHCFwIVXUGLVzL/LA74fLdujnGMCvt0bh+idIQl6tHdjNSLgCwC1wqDECMcoT0WlNk2ct83x/xsAwNV4upk1oW8z9U+o61j29sIUvf9Hqux2u4GVAaht4uv6aegxD1S9viCFYywAhqg8qm+mgZUAAACguhD0BQAAqOFsdrsWbCsPu5pNUlJciMEVXRjx9fw1bVQ73dMrVp5u5Ye5+7KLNHbGaj397UZlFZQaXCFQ9QpKrPr8r30aNX2Vpi0/GvA1m6QejUP17pAEPXJpY0UF14xRvAEAZ8ZkMmlst2jFhZU/4JGWW6KXf9uuMhuBOACuyWI26YFL43TzxZGOZZ/9uVcvzt1GbwNQra5pHaGEev6SpMMFpXr7dx46AFD9OkUHqWKCOy1OzZSNPgQAAFDjEfQFAACo4TYeyFN6fokkKbFBgIJ93A2u6MJxM5t1c+dGmjGmg9o1CnIs/3HtAQ2evExz1qdx8wU1Qn5xmT7/a69umb5Kn/y5R7nFVknlAd9eTcoDvg9fGqdIAr4AUGt5upn1aO84BXm7SZLW7c/TlCW7DK4KAM6dyWTSqM5Ruq9XrCpyLfpp/UH96/tNKi6zGlobgNrDYjbp3u7RCvAqP8ZasTtbP64/aHBVAGqbIG93JUSUP3SQnl+qLYfyDa4IAAAAFxpBXwAAgBpuwbYMx+vujUMNrKT6RIX4aPKNiRrft5n8PMtvvGQWlOrJ2Rv0wMy1OpBdZHCFwLnJKy7T9JV7dcv01frkz73KOybge2nTUE0ekqAHe8WpYRABXwCAFObroUd7x8nNXB6Jm7MpXb9sPGRwVQBwfq5pU0//uqqZo7ct2nFYD329QbkVs1sAwIUW4uuhcd2iHe8//nOvdqQXGFcQgFqpc0yQ4/WilEzjCgEAAEC1IOgLAABQgxWX2bS44iKft7tZHY8Z5bamM5lMGnhRfc26o6N6xoc7lidvzdD17y3XrBV7mdIMLiOvuEyfrdijUdNX67MVe5VfcjTg27tpmN67vrUe6BmnBgR8AQD/0KyOn+7s2sjx/sMlu7R2X66BFQHA+evVLEwvXtNC3u7ltzjW7M3RuFlrlZ5XbHBlAGqLtpGBurpVHUlSmc2uV+bvUGEJo4sDqD7tGgbKy638WGj5rmyVlNkMrggAAAAXEkFfAACAGuzPnVkqLC2/wNc5JliebrXv8C/c31MvDWqlFwe1UqifhyQpv8SqSb9s0W3T/lZqOtOawXnlFpfp0z/LA77TV+6rFPDt0yxM71/fWvf3jFX9QC+DKwUAOLOeTUI1IKGuJMlml176bbsO5BCGA+Da2kUF6Y3BCQrydpck7Ugv0N1frNXuzEKDKwNQW9zYvoHiwnwkSftzivXBkl0GVwSgNvF0M6tDVKAkqbDUpr/25hhcEQAAAC6k2pf0AAAAqEUWbMtwvO7RONTASozXKz5cX97RUQMT6zmWrd6TrWEf/KkPk1NVamXEAziP3KIyfVIR8J3x19GAr8Vs0mXx4Xp/aGvd1yNW9Qj4AgDO0I3tG6htwwBJUl6xVS/M3aYCRp0D4OKa1fXTf65PUESApyTpQE6xxn6xVpvT8gyuDEBt4G4x64GeMfKqGF18wbbDWrA14zTfAoCq0zUm2PF6UcXMfgAAAKiZCPoCAADUUJkFpVpV8RR/mK+Hmkf4GVyR8fy93DW+X7wm35iohsHekqRSq12TF6ZoxJQVWseoBzBYTlGppi3frVumr9Lnf+1zBLAsZpMujw/X+9e31r3dY1QvgIAvAODsWMwm3d8zVg0qHhLZnVWk1xekyGqzG1wZAJyfhsHe+s/1CYqtGFUzq7BU985aqxU7s4wtDECtUC/AS3d0aeR4//7iXdqXXWRgRQBqk2Z1fBXqUz67wdr9ucouLDW4IgAAAFwoBH0BAABqqD92HNaR3EaPxiEym0zGFuRE2kcH6/MxHXRTpyhZKv5dth3M16iPVurVuVtVyOh2qGbZhaWatmy3Rk1frZl/71dhafkI025mk65sHq4Ph7bWuO4xjpHKAAA4F74eFj3eJ05+HhZJ0ord2Zqxcp/BVQHA+Qvz89SbgxPUukH5yOWFpTY9+s0G/bb5kMGVAagNusWFqGeT8pm0ispsenV+CjNHAagWZpNJnStG9bXZpSU86AQAAFBjEfQFAACooY6dKrB741ADK3FOXu4Wjbs0TtNGtVOzitGObXZp+rI9uv695Vq6/bDBFaI2yC4s1dQjAd9VlQO+V7Woow+GttbYbjGq40/AFwBQNeoHeunBXrEyVzwD9vWaA1q4jSmmAbg+fy83vXxtC3WNC5Ekldns+r+ftujrVfsNrgxAbTC6U6QaBJafu+/IKNCnf+41uCIAtUWX6CDH60UpmcYVAgAAgAuKoC8AAEANlHq4QCmHCyVJTcJ91CDIy+CKnFd8PX9NG9VO9/SKladb+eHxvuwijZ2xWk9/u1FZBUx3hqqXVViq/y7dpVHTV+vLVftVVHY04Nu3RR19OKy17k6KJuALALgg2jQI0C0XRzrev/PHTm09lG9gRQBQNTzdLHqmX7yuallHkmSX9Mb8HZqyeKfsdruxxQGo0bzdLXqgZ6zcKp6m+n79Qa3YlW1wVQBqg/qBXooN8ZYk7cws0u6sQoMrAgAAwIVA0BcAAKAGWrjt6Gi0PRjN97TczGbd3LmRZozpoHaNghzLf1x7QIMnL9Oc9WncFEaVyCos1ZQl5QHfr1YfqBTw7deyjqYMa6O7kqIV7kfAFwBwYV3VIlx9moVJkkqtdk2ct10Z+SUGVwUA58/NbNIjfRrrhg4NHcs+XrZHr/xvu6w2zusAXDgxoT66uePR3vN2cirHVwCqRZeYYMfrRSlZxhUCAACAC4agLwAAQA1jtdn1+/byoK+b2aQusSEGV+Q6okJ8NPnGRI3v20x+nm6SpMyCUj05e4MemLlWaTlFBlcIV3W4oEQfVgR8v15zQMUVAV93i0lXt6qrKcPb6M6u0Qrz8zC4UgBAbWEymTS6U6RaRPhJKj/mmThvu+P/RwGAKzOZTBrTtZHu6R7jWPb92jRN+HEzfQ7ABXVVi3B1iAqUJOUUlemNhSk8ZADggrukUZAs5QOKa0lqpmz0HQAAgBqHoC8AAEANs3ZfrjILSiVJbRsGKMDLzeCKXIvJZNLAi+pr1h0d1TM+3LE8eWuGhkxerlkr9srG6L44Q4fzS/T+4p26dfpqzT4m4OthMWlAq7qaMqyN7ujSSGG+BHwBANXP3WLWw71iFV7xoMn29AK9k5zKTAYAaoxBbetr/JVNZTGXJ19+35ahR2avV15xmcGVAaipTCaTxiZFK9TXXZK0bn+evl59wOCqANR0/l5ual0/QJKUWVimDWl5BlcEAACAqkbQFwAAoIZZsC3D8bpHk1ADK3Ft4f6eemlQK704qJVCK8Iv+SVWTfpli26b9rdS0/MNrhDO7HB+id5ftFO3zlitb9emqcRaHpjysJg0IKE84DumSyOFEvAFABgs0Ntdj/eJk5db+WXC5B2Zmr0mzeCqAKDq9IkP18QBzeXtXt7nVu3J0b2z1ikjv8TgygDUVP5ebrqve4wqnjHQF3/v08YDhO4AXFhdY4Idr/9IyTSwEgAAAFwIBH0BAABqkMISq5btzJIk+XlY1C4y0NiCaoBe8eH68o6OGphYz7Fs9Z5sDfvgT32YnKpSK9O+4qj0/BJNXrRTo2as1rfrjgZ8Pd3MuqZ1hKYMb6MxnRsphIAvAMCJRIf4aFz3aMf7z1bs1Z+7sgyrBwCqWsfoYL02qJUCK2a82XYoX2O/WKu9WYUGVwagpmpZz1+DK64l2ezSawt2KJfRxAFcQIkN/OXjbpEkrdidraJSq8EVAQAAoCoR9AUAAKhBlu7MUnFZefC0S2yw3C0c7lUFfy93je8Xr8k3JqphsLckqdRq1+SFKRoxZYXW7c0xuEIYLT2vRO/+karRM1br+3VpKj0m4Htt6whNGdZGoztFKcSHgC8AwDldEh2sYW3rS5Lskl5bkKKdhwnAAag5mkf4663rE1TX31OStC+7SGO/WKstBxllE8CFMSixnlpE+EmS0vNL9U7yTtntdoOrAlBTuVvMurhR+cAfJVa7VuzmmjUAAEBNQvIDAACgBlmwNcPxukfjUAMrqZnaRwfr8zEddFOnKFlM5fMvbjuYr1EfrdSrc7eqsIRREmqbQ3nFeic5VbfOWK0f1h+sFPC9rk2E/ju8jW7tFKVgH3eDKwUA4PQGJUaoS8V0r0WlNk2ct005RYw8B6DmaBTio7evT1BMqI8k6XBBqe6dtU5/7842uDIANZHFbNJ9PWLk51k+wuaynVn6ZeMhg6sCUJN1rTifk6Q/UjINrAQAAABVjaAvAABADZGeV6J1+3MlSfUCPNW0jq/BFdVMXu4Wjbs0TtNGtVOzilFZbHZp+rI9uv695Vq6/bDBFaI6HMwt1n+SUzV6xhr9uOGgymzlAV8vN7MGJdbT1OFtNOqSKAV5E/AFALgOk8mksd2iFVsRgEvLLdHLv213/P85AKgJ6vh76s0hrdSqnr8kqaDEqodnr9fCrekGVwagJgrz9dDYpGjH+4+W71Hq4QLjCgJQozUO81Edv/IZxTam5elwQYnBFQEAAKCqEPQFAACoIX7fflhHIhjdG4fKVDHiLC6M+Hr+mjaqne7pFStPt/LD6n3ZRRo7Y7We/najsgpKDa4QF8LB3GK9/XuKbvt8jX46JuDr7W7WkMR6+u/wNrrl4kgFEvAFALgoTzezHusdpyBvN0nSuv15mrJkt8FVAUDVCvBy1yvXtVSnilHvSq12Pf3DZn235oDBlQGoiTo2CtJVLcIllfebV+enqKiUWaEAVD2TyeQY1dcuaXFqlqH1AAAAoOoQ9AUAAKgB7Ha7FmzLcLzv3jjEwGpqDzezWTd3bqQZYzqoXaMgx/If1x7Q4MnLNGd9mux2RsCrCdJyi/XmwvKA788bD1UO+F5UHvC9mYAvAKCGCPPz0KO94+RmLn9wbM6mQ0wzDaDG8XK36Nmr43VFizqSysMwr/xvuz5auovzOABV7qYODRUT4i1J2pNVpClLeZAKwIXROSbI8XpRSibHNQAAADUEQV8AAIAaYHt6gfZkFUmSWtT1U11/T4Mrql2iQnw0+cZEje/bTH6e5aPfZRaU6snZG/TAzLVKyykyuEKcq/05RXqjIuA7Z1PlgO/QtvU1dXiibu4YqQAvAr4AgJqlWR0/3dm1keP9h0t2ae2+XAMrAoCq52Yx67HLGmtouwaOZVOX7NYb83fIaiMUA6DqeLiZ9UDPWHlVzAr1vy0Z+mPHYYOrAlAT1fHzVNNwH0nS3uxipWYWGlwRAAAAqgJBXwAAgBpg4bajNwa6Nwk1sJLay2QyaeBF9TXrjo7q2SzMsTx5a4aGTF6uWSv2ysboCS5jf06RXl+wQ2M+X6NfNx1y3OT38bBoWNv6mnpDokZ0aCh/LzeDKwUA4MLp2SRUAxLqSpJsduml37brQE6xwVUBQNUymUy6s1u07kyKdiybvfqA/v3zZpWU2YwrDECN0yDIS6M7RTreT/5jJ8dWAC6ILjHBjteLUrKMKwQAAABVhqAvAACAiyuz2ZVcMQKIu8WkztFBxhZUy4X7e+qlwQl6cVArhfp5SJLyS6ya9MsW3Tbtb6Wm5xtcIU5lX3aRXptfHvCduzldRwbx8vWwaHi7+vrv8Da6sUND+XsS8AUA1A43tm+gtg0DJEl5xVa9MHebCkqsBlcFAFVvaPsGevzyJrKYyt/P35Khx77ZoIKSMmMLA1Cj9GwSqm5xIZKkglKbXluwwzF7EABUlY5RQXI3lx/ULEnNpM8AAADUAAR9AQAAXNzfe7KVU1R+47FDVJB8CSA6hV7x4fryjo4amFjPsWz1nmwN++BPfZicqlIrI0M5k73ZRXp1/nbd/sUazdtSOeB7Q/sG+u/wNrqhPQFfAEDtYzGbdH/PWDUI9JIk7c4q0usLUpjSHkCNdEWLOnquf3N5upXfOlm5O1v3zVqnzIISgysDUFOYTCbd3jlKEf6ekqSthwo0fcVeg6sCUNP4elh0UcUDm7nFVq3dn2twRQAAADhfBH0BAABc3MJthx2vezQONbAS/JO/l7vG94vX5BsT1TDYW5JUarVr8sIUjZiyQuv25hhcIfZmFeqV37brji/W6H9bMhwBXz9Pi25s30BTh7fR8HYN5EfAFwBQi/l6WPR4nzj5eVgkSSt2Z2vGyn0GVwUAF0an2BC9el1Lx0N+mw/ma+wXa7U/u8jgygDUFN4eFj3QM0ZuFaNtfrM2TX/vyTa4KgA1TZeYYMfrRSmZBlYCAACAqkDQFwAAwIXlFZfpz11ZkqQALzclVjylD+fSPjpYn4/poJs6RcliKr+Js+1gvkZ9tFKvzt2qQqa/rna7Mwv10v+2646Za/Xb1qMBX39Pi0Z0aKipwxM1rF0DRsgGAKBC/UAvPdgrVhV5FH295oAWbsswtigAuEBa1Q/QW0MSFO7nIUnak1Wku79Yo+2H8g2uDEBN0TjcVze2b+B4/+bCVGUWlBpYEYCaJqGev/w9yx/W/HtPjvK5Bg0AAODSCPoCAAC4sMUpmSq1licUk+JCHCOBwPl4uVs07tI4TRvVTk3r+kmSbHZp+rI9uv695Vq6/fBptoCqsCuzUC/+b5vunLlWC7YdDfgGeLnp5o4N9d/hiRratr58KkYsBAAAR7VpEKBbLo50vH/nj53aSugNQA0VE+aj/1zfWlEh5bOzZOSXatystVrNqJsAqki/VnXUtuKh/eyiMr25MEU2u93gqgDUFG5mkzo1CpIkldrsWl4xYAgAAABcE0FfAAAAF7Zw29FwaI/GoQZWgjMVX89fH49qp3t6xcrTrfxwfF92kcbOWK0J321UFqO3XBC7Mgs1ad423TVzrRZuO6wjt82OBHynDGujIRcR8AUA4HSuahGuPs3CJEmlVrsmztuujPwSg6sCgAujboCn3hqSoBYR5Q9r5hVb9dDXG7RoOyOaAzh/ZpNJY7tFK9jHXZK0el+uvlmTZnBVAGqSLrHBjteLU7KMKwQAAADnjaAvAACAizqQU6yNaXmSpMggL8WGehtcEc6Um8Wsmzs30owxHdSuYlQFSfphzQENnrxMc9anyc4ILlUi9XCBJs4tD/j+vr1ywPeWiyP13+EEfAEAOBsmk0mjO0WqRcUMBZkFpZo4b7uKy2wGVwYAF0aQt7teHdRKHSvO3UqsNo3/fpN+XEcYD8D5C/J2133do3Vkjq4ZK/dq88E8Q2sCUHNEB3urfoCnJGnzoXwdzCs2uCIAAACcK4K+AAAALmrhtqMjCHVvHCqTyXSKteGMokJ8NPnGRI3v20x+nm6SysMyT87eoAdmrlVaTpHBFbqu1IwCPT93q+6etU7JO44GfIO83TTqkkhNHd5GgxLrydudgC8AAGfL3WLWw5fGKtzPQ5K0Pb1A7/yxkweVANRY3u4WPT+guXrHl49obrNLL87dpk+X76H3AThvCfUDdF2bCEmS1S69Nj9F+cVlBlcFoCYwmUzqEsOovgAAADUBQV8AAAAXZLfbtXDbYUmSSVK3xiHGFoRzZjKZNPCi+pp1R0f1rJgGW5KSt2ZoyOTlmrVir2zcOD5jKRkFev7Xrbr7y3VatCPTsTzI2023XhKpKcPa6Lo29eRFwBcAgPMS6O2ux/vEycut/PJi8vbDms1U0wBqMHeLWU9e0VSDL6rnWPbBop16e2EK52wAztv1beurWR1fSdLBvBK9u2gXDxIAqBKdo4Mco4YvSsmktwAAALgogr4AAAAuaPPBfB3ILZ9mK6G+v8J8PQyuCOcr3N9TLw1O0IuDWim0YnS8/BKrJv2yRWM+/lup6fkGV+jctqfn69k5WzX2y3ValHI04Bvs467RnaI0ZVgbXUvAFwCAKhUd4qNx3aMd7z9bsVd/7soyrB4AuNDMJpPu7h6jMV0aOZZ9+fd+PffLFpVabQZWBsDVWcwm3d8jRr4e5dctFqdkat6WjNN8CwBOL9TXQ83r+kmS0vJKtC29wOCKAAAAcC4I+gIAALigBduOXujv3jjUwEpQ1XrFh2vW7R01MPHoKFGrdmdr2Ad/6sPkVG4e/8P29Hz9e84WjftqvZakHg34hvi4a0zn8oDvNa0jCPgCAHCBXBIdrGFt60uS7JJeW5CinYcLjS0KAC4gk8mkGzo21CN9GstcMTzevE3peuLbjSoosRpbHACXVsffU3d1PfogwZQlu7Q7k+MqAOevS0yQ4/WxgyQAAADAdRD0BQAAcDElZTYt2lF+Mc7TzaxLooOMLQhVLsDbXeP7xWvyjYlqGOwtSSq12jV5YYpGTFmhdXtzDK7QeFsP5ev/fikP+C5NzXIsD/Fx1+2do/ThsDYakBAhTzdOeQAAuNAGJUaoS0ywJKmo1KaJ87Ypp6jM4KoA4MLq26qu/n11vDws5eccy3dm6YGv1imrsNTgygC4sk4xwbo8PkySVGK165X5O1RcxkPfAM5P+8hAeVjKn1BatiubwSQAAABcEHe9AQAAXMzK3dnKrxgl6JLoIHkzUmmN1T46WJ+P6aCbOkXJYiq/ELvtYL5GfbRSr87dqsJaOFrU1kN5eubnLbrv6/VatjPLsTzU1113dGmkKcPaqD8BXwAAqpXJZNLYbtGKDfWRJKXllujl37arzGY3uDIAuLC6xoXq5WtbyM+z/Lx844E83fPFWqXlFBtcGQBXNvLiSEUFe0mSdmUW6aNluw2uCICr83a3qH1koCQpv8SqVftyDa4IAAAAZ4u73wAAAC5mwbYMx+vujUMNrATVwcvdonGXxmnaqHZqWtdPkmSzS9OX7dH17y3X0u2HDa6wemw+mKenf96s+77eoOW7shzLw3w9dGfXRvpwaBtd3aquPAj4AgBgCE83sx7rHacgbzdJ0rr9eZqyhFAKgJqvTcNAvTk4QaG+7pKkXZmFuvuLNUpJLzC4MgCuytPNrAd7xjpG35yzKV1LUjINrgqAqzsyC4skLaKnAAAAuBzuggMAALiQ7MJS/bU7W5IU4uOuhHr+BleE6hJfz18fj2qnsb1iHaPV7ssu0tgZqzXhu43KKqiZ08NuSsvT0z9t1gOzN2jFrmzH8nA/D93VtZE+HNZa/VoS8AUAwBmE+Xno0d5xcjMfCaUc0i8bDxlcFQBceHHhvvrP9a3VMKh8BM5DeSW6Z+ZarduXY3BlAFxVZLC3br0k0vH+nT926mAuo4UDOHct6/o5HsxcvTdHuUVlBlcEAACAs8HdcAAAABeyKCVT1ooZkLvFhchSEaJA7eBmMWtk50aaMaaD2jUKciz/Yc0BDXlvmX5dnya7vWZMkb3xQK6e+nGzHvxmg1bsrhzwHZsUrQ+GtlbflnXlbuGUBgAAZ9Ksjp/u7NrI8f7DJbu0lmlhAdQC9QK99Pb1CWpWx1eSlFtcpge+Wq8lO2rHLCwAql7vZmHqXDECZ36JVa8tSJHVVjOu+wCofmazSZ2jy3uK1S4tO2bWNAAAADg/7ooDAAC4kAVbMxyvuzcONbASGCkqxEeTb0zU+L7N5OdZPgrD4fxSPTF7gx6YuVZpOUUGV3juNhzI1VM/btJD327UX3uOBnzr+Hnonm7lAd8rW9Qh4AsAgBPr2SRUAxLqSpJsduml37brQA4j0AGo+YJ9PPT64FZqFxUoSSous+nJ7zbqlw0HDa4MgCsymUy6s2sj1fHzkCRtPpivL/7aZ3BVAFxZl5ggx+s/UjKNKwQAAABnjbvjAAAALmJPVpG2pRdIkmJCvdUoxNvgimAkk8mkgRfV16w7OqpnszDH8uStGRoyeblmrdgrmwuN7rt+f66e/GGTHv52o/7ac3R627r+HhrXLVrvD22tK5oT8AUAwFXc2L6B2jYMkCTlFVv1wtxtKiixGlwVAFx4Ph5umjighXo1LT9Ps9qlF+Zs1ecr9hpcGQBX5Oth0f09Y3RkUq+vVh/Q2n05p/4SAJxEZJC3ooK9JEk7Mgq134UHjAAAAKhtuEsOAADgIhZuOzqabw9G80WFcH9PvTQ4QS8OaqXQihFe8kusmvTLFo35+G+lpucbXOGprduXoye+36RHvtuoVXuP3qiKCPDUvd1j9P71rXU5AV8AAFyOxWzS/T1j1SCw/Cby7qwivc500wBqCQ83s566qqmuSaznWPZucqre/T3FpR7IBOAcmtXx0/B2DSRJdkmvL0xVdmGpsUUBcFldY4IdrxelZBlXCAAAAM4Kd8sBAABcgM1udwR9zSYpKS7E4IrgbHrFh2vW7R018Jgbyat2Z2vYB3/qw+RUlVptBlZ3vLX7cvT49xv16PebtPqYkWjqBXjqvh4xem9Igi6LD5cbAV8AAFyWr4dFj/eJk5+HRZK0Yne2ZqxkumkAtYPZZNK9PWI0qlOUY9nnK/dp4pytKnOy8zMAzm9g67pqU99fkpRZUKq3fk/lwQEA5+SSRkEyVYwSviglk14CAADgIrhrDgAA4ALW789Ten75SB2JDQIU5O1ucEVwRgHe7hrfL17v3pCohsHekqRSq12TF6ZoxJQVWrfX2Kkd7Xa7Vu/N0WPfbdRj32/Smn25js/qB3jq/h4xeu/61urTjIAvAAA1Rf1ALz3YK9Yx3fTXaw5UmqkCAGoyk8mkmy+J1IOXxjn64JyNhzT++00qKrUaWxwAl2I2mTSue4wCvdwkSX/tydEP6w4aXBUAVxTk7a6EiPIHBzIKSrX5oHPPCAcAAIBy3D0HAABwAceGIXo0CTWwEriCDjHB+nxMB93UKUqWiuEZth3M16iPVurVuVtVWFK9N5SPBHwf/W6Tnvhhk9buPxrwbRDopQd7xmry9a3Vu1m4LEfufgMAgBqjTYMA3XJxpOP9O3/s1NZD3EwGUHv0bx2hZ/rGy91Sfr6zJCVTD3y1XjlFpQZXBsCVBPu4697uMY73n67Yq+3pHFMBOHtdY4IdrxelZBpYCQAAAM4UQV8AAAAnV1xm0+LU8ottPu5mdYgKMrYguAQvd4vGXRqnaaPaqWldP0mSzS5NX7ZH17+3XEu3H77gNdjtdv29J1uPfrdRT/ywSesPHA34Ngzy0kO9YvXukAT1ahpGwBcAgBruqhbh6tMsTFL5jAMT521XRn6JwVUBQPXp1iRUL17TQj4eFknS+v25uueLtTqYW2xwZQBcSWLDAA1MqCtJKrPZ9cpvKSqo5ge6Abi+tg0D5O1eHhVZvitbxWU2gysCAADA6RD0BQAAcHLLdmapqLT8QlunmGB5unEIhzMXX89fH49qp7G9Yh3/t7Mvu0hjZ6zWhO82Kqug6keQstvt+mt3th7+dqPG/7hZ6w/kOT5rGOSlh3vF6p3BCerZhIAvAAC1hclk0uhOkWpR8QBSZkGpJs3bzg1lALVK28ggvTG4lUJ83CVJqYcLdfcXa7TzcIHBlQFwJcPa1VeTcB9J0oHcYr2/eJfsdrvBVQFwJR5uZnWIDJQkFZXZ9NeebIMrAgAAwOmQEgEAAHByC7ZmOF73aBxqYCVwVW4Ws0Z2bqQZYzqoXaMgx/If1hzQkPeW6df1aVVyQ8hut2vl7iw99M0GPfXTZm1MOxrwjQr21iOXxumdwQnqQcAXAIBayd1i1sOXxircz0OStC29QO/8sZNgCoBapWkdP719fYLqB3pJkg7mluieL9Zqw/7c03wTAMq5W8y6v0esfCpG4/x9+2Et2HbhZ24CULN0jQl2vF6UkmVcIQAAADgjBH0BAACc2OGCUq3ZlyNJquPnoeYRfgZXBFcWFeKjyTcmanzfZvLzdJMkHc4v1ROzN+iBmWuVllN0Ttu12+1asStLD36zQf/6aYs2Hcx3fNYo2FuP9Y7Tfwa3UvfGoQR8AQCo5QK93fV4nzh5Vcw0kLz9sGavSTO4KgCoXg2CvPX29QlqHO4rScouKtP9X67T8tRMgysD4CoiAjx1R9dGjvfvL96lvVnndl0HQO3UtI6vwnzLZxlYeyBXWYVVP/MbAAAAqg5BXwAAACeWvP2wbBUDnHVrHCKziZAkzo/JZNLAi+pr1h0d1bNZmGN58tYMDZm8XLNW7JXtDEfVs9vtWr4zSw98s0FP/7xFm48N+IZ467HejfX24FZKigvl/3YBAIBDdIiPxnWPdrz/bMVe/bkry7B6AMAIob4eemNwKyU2DJBUPm32Y99u1NxNhwyuDICr6Bobokubls/+VVxm06vzd6jUajO4KgCuwmwyqXN0+ai+dru0JDXL2IIAAABwSgR9AQAAnNiCbRmO1z0ahxpYCWqacH9PvTQ4QS8OaqXQiumz80usmvTLFo35+G+lpuef9LvlAd9M3T97g575ZYu2HBPwjQnx1hN9GuvtQa2UFEc4HQAAnNgl0cEa1ra+JMku6bUFKdp5uNDYogCgmvl5uunFa1qqW8X5vtVm17M/b9GXf+0zuDIAruLWSyLVINBLkpRyuFAfL99rcEUAXEmXmCDH60UpzCwAAADgzAj6AgAAOKmUjAJH2KFpuK/qV1y0B6pSr/hwzbq9owYm1nMsW7U7W8M++FMfJqdWGgnGbrdraWqm7vt6vZ75Zau2Hjom4Bvqoycva6w3B7VSl1gCvgAA4PQGJUaoS0z5CFJFpTZNnLdNOUVlBlcFANXL082sCX2bqX9CXceytxam6P0/dsp+hrOtAKi9vNwterBXjNwt5ddhftxwUMt3ZhlbFACXUS/AS3GhPpKkXVlF2pXJw5cAAADOiqAvAACAk1q47bDjdffGIQZWgpouwNtd4/vF690bEtUw2FuSVGq1a/LCFI2YskLr9mZrSUqm7v16vf49Z6u2pRc4vhsb6qPxlzXRm9e1VOcYAr4AAODMmUwmje0WrdiKG8tpuSV6+bftKrMRbANQu1jMJj1waZxuvjjSseyzP/fopXnb6IkATis6xEcjOzZ0vH87OVXp+SUGVgTAlVQa1TeVUX0BAACcFUFfAAAAJ2S12fX79gxJkpvZpK6xBH1x4XWICdbnYzropk5RslQEdndnFenBbzbo2V+3avsxAd+4MB89dXl5wLdTTDABXwAAcE483cx6rHecgrzdJEnr9udpypLdBlcFANXPZDJpVOco3dczVkfOrn5cd1BP/7BJxWVWQ2sD4PyuaB6uixsFSZLyiq16fUGKrDwoAOAMXBwVJIu5/OhjSWqWbPQOAAAAp0TQFwAAwAmt2ZejrMLyaYvbRQbK38vN4IpQW3i5WzS2V6wevKKJ6oX7KijQS25uFsfn9fw99fQVTfTGtS11SXSwTAR8AQDAeQrz89CjvePkVnFzec6mQ/pl4yGDqwIAY1yTWE//uqqZoyf+sf2wHvp6g3KLygyuDIAzM5lMuiupkcJ83SVJGw7k6ctV+w2uCoAr8PdyU2J9f0lSVmGZ1qflGVwRAAAAToSgLwAAgBNasO2w43X3xozmi+phs9uVvP2w7vlynaat2CvbMZ+VllqVmV2kVTsO66fVB5RdyE1mAABQdZrV8dOdXRs53n+4ZJfW7ss1sCIAME6vZmGaNLCFvN3Lb+Gs2ZujcbPWKj2v2ODKADgzf0833d8jVhXPCWjWqv1av5/jKQCn1zkm2PF6UUqmgZUAAADgZAj6AgAAOJnCEquWpZZfTPPztKhdZKDBFaGmKw/4ZmjsrHWaOG+bUg8XOj5rVsdXd3dtpOgAT5WUlE8X+8OaAxry3jL9uj5NdjtTuQEAgKrRs0moBiTUlSTZ7NJLv23XgRxCbQBqp/aNgvT6oFYK8i4fnXNHeoHu/mKtdmcWnuabAGqz5hF+uv6i+pLKj6deW5DCiOAATiuxvr98PcpndVuxO1uFpVaDKwIAAMA/EfQFAABwMktSM1ViLQ9Pdo0NkbuFQzZcGFabXQu3ZejuWes0cd527TzmhnF8HV/931VN9crAFrqqZV1NHnGRxvdtJj9PN0nS4fxSPTF7gx6YuVZpOUVG7QIAAKhhbmzfQG0bBkiS8oqtemHuNhWUcJMZQO0UH+Gvt69PUESApyTpQE6xxn6xVpuZUhvAKVzbJkKt6vlJkg4XlOrt5FQe1AZwSu4Wsy5uFCRJKrHatWJ3trEFAQAA4DikRgAAAJzMgm2HHa97NA41sBLUVFabXQu2puvuWWv14v+2a9cxAd/mdf3076ua6eWBLdQuMkgmU/l8j2aTSQMvqq9Zd3RUz2ZhjvWTt2ZoyOTl+nLlXtm4aQQAAM6TxWzS/T1j1SDQS5K0O6tIry9MkdXGcQaA2iky2Fv/uT5BsWE+kqSswlLdO2utVuzMMrYwAE7LYjbp3u4x8vcsH53zz13Z+mnDIYOrAuDsusYEOV4vSskyrA4AAACcGEFfAAAAJ3Iwt1jr9udKkuoFeKpJuI/BFaEmsdrsmr81XXfNWquXftuh3VlHR+JtEeGnZ/s200sDmqttZKAj4PtP4f6eemlwgl4c1Eqhfh6SpPwSqyb+vEVjPv5bqen51bIvAACg5vL1sOjxPnHyOzJ17K5szVi5z+CqAMA4YX6eenNwglo3KB/xvLDUpke/2aDfNqcbXBkAZxXq66F7usU43k9bvkcpGQUGVgTA2cWF+qiuf/n13o1peUrPLzG4IgAAAByLoC8AAIAT+X175dF8Txa2BM6G1WbXb1vSdefMtXr5tx3ac0zAt2WEv57r20wv9m+uixqePOD7T73iwzXr9o4amFjPsWzV7mwN++BPfZicqlKrrcr3AwAA1B71A730YK9YmSsOTb5ec0ALt2UYWxQAGMjfy00vX9tCXeNCJEllNrv+76fNmr1qv8GVAXBW7aMC1a9lHUnlPePV+TtUWGo1uCoAzspkMqlLdLAkyS5pSWqWofUAAACgMoK+AAAATsJut2vhtqNB3+6NQwysBjWB1WbXvM2HdMfMNXpl/g7tzT4a8E2o56/n+8VrUv94JZ5FwPdYAd7uGt8vXu/ekKiGwd6SpFKrXZMXpmjElBVavy+nyvYFAADUPm0aBOiWiyMd79/5Y6e2HmL2AAC1l6ebRc/0i9dVFcE9u6TX5+/Qfxfvkt1uN7Y4AE5pRIcGig0tnzFsb3axPlyy2+CKADizLjHBjteLUjI5vgAAAHAiBH0BAACcxLb0AkcQs0WEn+r4expcEVyV1WbX3M2HdPsXa/TaghTtyy52fJZQ318vXB2vif2bq02DgCoZNbpDTLA+H9NBN3WKkqVie9sO5uuWqSv16tytKixhtBgAAHBurmoRrj7NwiSVP1A0cd52ZTCFLIBazM1s0iN9GuuGDg0dy6Yt261X/7ddVhthHACVuVvMeqBnjLzcy28Jz9+aUWlGMQA4Vrifh5qF+0qS9uUUK+VwocEVAQAA4AiCvgAAAE5iwTFTEfdoHGpgJXBVZVabft10SGO+WKPXF6Rof87RgG+b+gGadHW8Jl7dXK3rB1T5b3u5WzTu0jhNG9VOTev6SZJsdmn6sj26/r3lWspNJAAAcA5MJpNGd4pUi4rji8yCUk2at13FZTaDKwMA45hMJo3p2kj3dI9xLPtubZqe+XEz/RHAceoHemlMpyjH+/cW7dT+nKJTfANAbdYlJsjxelFKpnGFAAAAoBKCvgAAAE6g1GrToh3lF808LCZ1OmaKLOB0yqw2zdl4UGO+WKs3FqbowDEB38QGAXqxf3M9f3W8Wl2AgO8/xdfz18ej2mlsr1h5upWfbuzLLtLYGas14buNyiooveA1AACAmsXdYtbDl8Yq3M9DUvlMGO/8sZNpZAHUeoPa1tf4K5vKYi6fWWXhtgw9Mnu98orLDK4MgLPp0SRUPRqHSJIKS216dX6KSq08GADgeB2jguRecWyxdGeWypgxAAAAwCkQ9AUAAHACf+/JUU5R+Y24DlFB8vWwGFwRXEGp1aZfNh7UbZ+v0Zu/pyot92jA96KGAXppQHM91y9eLev5V2tdbhazRnZupBljOqhdoyDH8h/WHNCQ95bp1/VpBHMAAMBZCfR21+N94uRV8SBR8vbDmr0mzeCqAMB4feLD9cKA5o7+uGpPju6dtU4Z+SUGVwbA2dzWOUr1AjwlSdv/n737Do+qzNs4fk9J7w1IAqQQepUmTWm6rqhYkCKKIhZwBVHUVSy7+uoqdmyAKLCighR1bWsBMSAdIoZeEhISAgmQ3svMvH9MGIJrAYWclO/nuva6zpyQcOuaKefcz+85UaIPth4xOBGAusjb3aILmjsHRhSW27TjSKHBiQAAACBR9AUAAKgT4pOyXceDWocYmAT1QaXNrq92Owu+r69J1bGiUzdwuzcP0ItXt9fTV7RTh2a1W/D9uZbB3pp9Uzc9OqytfD2skqSc4ko98sluTVu6Q1lsEwkAAM5CdLC37hkY7Xr8wdYMbUnLMywPANQVF0YH6ZXrOynA0/m5K+l4sSYv2aGMvFKDkwGoS7zcLJo2OFbW6kmdn+3M0o/p+QanAlAXDaix4+DalFwDkwAAAOAkir4AAAAGKyqv0tY050X1AE+rukX6G5wIdVWlza4vd2Xpjg+3640fUnW8RsG3Z4sAvXRNBz11RVu1N7jgW5PZZNK13SO0bFJvDW4b6jr/w4FsjZqzWcsTMmRnui8AADhDfaKDdEP3CEmSQ9Ir8Sk6lEORDQA6hPvp9dGd1cTPXZJ0JL9Mk5fs0P5jRQYnA1CXtAr11s29Il2PX1uTqpySSgMTAaiLOoX7yb96AdFPGQUqrqgyOBEAAAAo+gIAABhsXUququzOouNFrYJlqZ6qAZxUabPri11Zun3xds1ae+j0gm/LAL18bQc9Oayt2jX1NTDlbwvz89ALIzvr+es7KcTXeeO5uMKmGV/t150Ltyn1RLHBCQEAQH1xfbdm6lc9Yaqs0q4ZK5NUWMaNZwCICvbWm6O7KDrYS5KUU1Kpqct2ahsTOwHUcEXHJurZIkCSVFBWpVfjU2SzswgbwClWs0l9ogIlSZV2hzan8V4CAADAaBR9AQAADBZ/INt1PCguxMAkqGsqquz6fGeWblucqNlrD+lE8amCb++WgXrl2g568vK2atuk7hZ8f25IuzAtm9hb13QLd537KT1fN7y9Re/8kKpKm93AdAAAoD4wmUyacnG0YkO8JUlZhRV6YVWya/EcADRmTfw89ProzuoY7tzppaTCpgc/2aU1Na49AGjcTCaTJl8crWBvN0nSjqOF+mR7psGpANQ1A2ICXcdrU3KNCwIAAABJFH0BAAAMlVlQrn3HnJNMWwZ5KibEy+BEqAvKq+z6bEembl+cqDnrDim7+NQWihdGBWrmdR31z8vbqE09KvjW5O/lpseubKfZN3ZT8yDnf/OVNofmrE7RuHlbtetIgcEJAQBAXedhNevhS1op0Mu5nezOo0WatyHd4FQAUDf4e7rp5REd1ad6+nmlzaF/frlXn1HkA1DN39OqewfF6OS+Yh/+eER7s4oMzQSgbokK8lJkgIck6cDxEmUVlhucCAAAoHGj6AsAAGCg+KTTp/maTKbf+NNo6Mqr7Pq0uuD71vo0ZZecKvj2iQ7Uq9d11D/+2katw3wMTHnu9IoJ0od39tLNfVvKUv3fftKxYt26IEEvrzig0gqbwQkBAEBdFurrrocuaSWr2fk+4pu9x/X1nuMGpwKAusHTzaJ/XdVOl7UPkyTZHdJL3yXr3Y3pcjiYgA5A6hTup5EXOHdcsjukl79PUVF5lcGpANQVJpNJA6oXDUnS+tQ848IAAACAoi8AAIBRHA6HVlcXfU2SLmoVbGwgGKas0qZPtmfqtsWJmrs+TTk1Cr59o4P02oiOevyyNoprIAXfmjzdLLpnaCu9O6GH2jR1Tii2O6RFmw5r9FubtTE5x+CEAACgLmvbxFd3DYhyPX5nQ5p2HCk0MBEA1B1Wi1nTL2utMT0iXefmb0jTq98flJ2yLwBJI7uFq0P19ZgTxRWatfYQiwEAuPSNDnRN/l6XksvzAwAAgIEo+gIAABhkb1axsgorJEldIvwU4uNucCLUtrJKmz5OPKrbFm/XOxvSlFuj4Ns/JkivX99Jj13WWq1CG17B9+fahftp4YQemjwkVh5W58eUI/llmrw4UU98tkd5Nf7dAAAA1DS4dYiu7txUknPB0AurkpVZwLayACA5p/HddXG07roo2nXuk8RMPfXf/aqoshsXDECdYDGbdO+gGPm6WyRJG1Pz9O2+EwanAlBXBHu7q0Mz52KAY0UVOnCixOBEAAAAjRdFXwAAAIPEV0/zlaRBrUMMTILaVlZp00eJR3Xb4kTN25iuvNIaBd/YIL1xfSc98pfWig3xNjBl7bNazBrfL0qL7+ylHlGBrvNfbM/UqLc26dtdWUyNAAAAv+imnpHq3txfklRUbtOzK5JUUmEzOBUA1B1jekZq+mWtZakey7dq/wk9/OlulVRUGRsMgOFCfd11d43FAAs2putQTqlxgQDUKQNiglzH61JyDUwCAADQuFH0BQAAMEBFld11UczTataFNUqNaLhKK21a/tNRTViUqPkb05VX6ryhapJ0UWyw3hzZSY9c2loxjazg+3Mtg701+6ZuenRYW/l6WCVJOcWVeuST3Zq2dIeyCsoMTggAAOoai9mk+wbHKjLAU5KUnlemmatTZLOzSAgATvprhyb61/D2rl1UEtLyde/yXeygAkAXRgfq8vZhkqQKm0Mvf39Q5Uz9BiCpR3N/uVevFNp0KF8VNp4bAAAAjEDRFwAAwABb0/NdE8b6RAfK081icCKcTyUVNi3ddkQTFiVqwaZ05ZedKvhe3MpZ8H340jhFBzfugm9NZpNJ13aP0LJJvTW4bajr/A8HsjVqzmYtT8iQnem+AACgBh93i6Zf2sq19fTWtHwtTjhicCoAqFv6xgbr5REd5Ve9qHJfVpHuXrJdR/NZUAk0drf0bq6oYC9JzkVT8zemG5wIQF3g6WZRrxYBkqSSSpt+yigwOBEAAEDjRNEXAADAAPFJ2a7jQa1DDEyC8+lkwfe2xYl6d/NhFdQo+A6MC9asUZ310CVxiqLg+6vC/Dz0wsjOev76TgrxdZckFVfYNOOr/bpz4Talnig2OCEAAKhLIgI8df+QWJmrt6b/eHumVtd47w0AkDpF+Ov1UZ0VVv0Z63Beme5esl3Jx/l8BTRm7laz7h8c65r6vWLfCa2v3pEMQOPWPzbIdbwuJc+4IAAAAI0YRV8AAIBalldaqW3p+ZKkYG83dWzmZ3AinGslFTZ9+OMRTVj002kFX7NJGhQXotmjOuvvQ+PUMsjL4KT1x5B2YVo2sbeu6RbuOvdTer5ueHuL3vkhVZVsGQcAAKp1jfTXrRe2cD2etfaQDlBeA4DTxIR6683RXVyfS7OLK3XPsh1KPJxvcDIARmoe6Knb+57+PiqrsNzARADqgg5NfBXk5dwNYPuRAtf1bgAAANQeir4AAAC1bN3BXNkczuOBccGynBw3hnqvuLxKixMydOuin/TelsMqLLdJchZ8h7R2FnwfHNpKLSj4/iH+Xm567Mp2mn1jNzWv/ndYaXNozuoUjZu3VbuOsG0cAABwGtYhTJe0CZXkfL8wY2WycoorDE4FAHVLU38PvT66s9o385UkFZXb9MDHu7UumUnoQGM2pHWILqqe3llSYdPL36eoyu4wOBUAI5nNJvWLdj4v2BzSxkN5xgYCAABohCj6AgAA1LL4GlsHD4wLMTAJzpWi8iotSsjQrYsS9f7WDBXVKPgObROiOaM66/4hrdQ8kILvudArJkgf3tlLN/dtKYvJWZRPOlasWxck6OUVB1RaYTM4IQAAMJrJZNId/VqoQ1NneS23pFIzViarvIpdAACgpkAvN708opN6RwVKkipsdj32+V59uTPL2GAADGMymTSxf5Sa+rlLkg4cL9aHCUcMTgXAaP1jglzH61NyDUwCAADQOFH0BQAAqEXpuaVKPlEiSWoV6u3aIhP1U1F5lT7YelgTFiXqg60ZKq44VfC9pE2o3hrdRdMGt1IkBd9zztPNonuGttK7E3qoTXWBx+6QFm06rDFzN2vjwRyDEwIAAKO5Wcx6cGiswnydJZWkEyWatfaQHA4m0gFATd7uFj1zdXsNbeuchG53SM+vSNIHmw/znAk0Ut7uFk0bHCtL9UZkH2/P1E8Z7KQENGbNAz0VXX0/42BOqTLyywxOBAAA0LhQ9AUAAKhFq5NOlQ8Htgo2MAn+jMLyKr2/xVnwXZRw5LSC76VtQzV3dBfdNzhWEQGeBidt+NqF+2nhhB6aPCRWHlbnx5uMvDJNXpSoJz7bo7ySSoMTAgAAIwV4uWn6pa3kWf0+4YfkHH2ynSmVAPBzbhazHru8ja6/INx1bu66Q3pzdYrslH2BRql1mI9u7BXpevza6hTllXKdBWjM+sUEuo6Z6gsAAFC7KPoCAADUErvDodXJ2ZKchdABFH3rncKyKr1XXfBd/OOpgq/FbNJf2oVp7pguundQrMIp+NYqq8Ws8f2itPjOXupRvd2sJH2xPVOj3tqkb3dlMYUKAIBGLDrYW/cMjHY9/mBrhrak5RmWBwDqKrPJpMkDY3Rn/yjXuWXbjuqZrw+o0mY3MBkAowzv1FTdIv0lSXmlVXptdSrlf6AR6xsVKHP1pO91qXk8HwAAANQiir4AAAC1ZNfRQmUXO6dedG8eoEAvN4MT4UwVlFXq3c3punXRT/rwxyMqqVHwvaxdmOaO7qKpA2MU7k/B10gtg701+6ZuenRYW/l6WCVJOcWVeuST3Zq2dIeyCthODgCAxqpPdJDGdI+QJDkkvRKfokM5pcaGAoA6yGQy6cbezfX3S+NcRZ4Ve4/rkU/3qLTSZmw4ALXObDLpnoHRCvRyXmf5KaNAn+1gdwSgsQrwclPncD9JUk5JpfYeKzY4EQAAQONB0RcAAKCWxCfluI4HxjHNtz7IL63Uu5vSNWFRopZuO6rSSucEI6vZpMvbh+mdMV10z8AYNfP3MDgpTjKbTLq2e4SWTeqtwW1DXed/OJCtUXM2a3lCBpMmAABopEZ2a6Z+MUGSpLJKu2asTFJhWZXBqQCgbrqiU1M9dVU7uVuct5E2H8rTtOU7lV9aaXAyALUt0MtNUwfGqLr7rw+2ZujAccp9QGPVv/ozlSStS8k1MAkAAEDjQtEXAACgFpRV2rSh+qKXt7tFPVsGGhsIvym/tFILThZ8fzq94DusQxO9PaaLJl8coyZ+FHzrqjA/D70wsrOev76TQnzdJUnFFTbN+Gq/7ly4TaknuCEFAEBjYzKZNOXiaMWGeEuSsgor9MKqZFXZWQQEAL9kQKsQvXhdB/l6WCRJuzOLNHnpDmUVlBucDEBt6xrpr2u7NJMk2RzSy98fVHEFU76Bxqh7pL+83Jw1ky1p+SqvshucCAAAoHGg6AsAAFALNh3KU1n1Ba/+MUHysPI2rC7KK63U/I1pmrAoUct/Our6/8xqNumKDk30zg1ddPdF0RR865Eh7cK0bGJvXdMt3HXup/R83fD2Fs1bm6pKGxeiAQBoTDysZj18SSvX9tM7jxZp3oZ0g1MBQN3VtXmAXhvZWSE+bpKktJxS3b1ku1KzSwxOBqC2jekRoTZhPpKcC6beWndIDnZNAhodd6tZvasHmZRV2ZVwON/YQAAAAI0EDRMAAIBasDopx3U8MC7YwCT4JbkllZq3wVnw/Sgx87SC75Udm2jeDV31t4uiFeZLwbc+8vdy02NXttPsG7upeZCXJKnS5tDs+BSNm7dVu44UGJwQAADUplBfdz10SStZzc4NqL/Ze1xf7zlucCoAqLtahfnozdFd1DzQU5J0vKhCk5fs0E4+SwGNitVs0rTBMfJ2d075XnswV6sOZBucCoAR+scEuo7XVe9kCAAAgPOLoi8AAMB5llNcoe3VN7+a+rmrXVNfgxPhpJySCr2zIU23LU7Ux9szXduMuVlMuqpTU80b21V3DYhWqK+7wUlxLvSKCdKHd/bSzX1bymJyFnuSjhXr1gUJennFAZWy5SQAAI1G2ya+umtAlOvxOxvStONIoYGJAKBuCw/w1BujO6ttE+c0z8LyKk37aJc2HMz5ne8E0JA08fPQ32q8h3p7fZoO55UamAiAEdqE+Si0etr/zswi5ZVWGpwIAACg4aPoCwAAcJ6tOZgje/UudgPjQmSuLhjCODnFFZq7/pBuW5SoT2oUfN0tJg3v1FTzbuiqSf2jFOpDwbeh8XSz6J6hrfTuhB5qU126tzukRZsOa8zczdrITWoAABqNwa1DNLxTU0nO9wMvrEpWZkG5wakAoO4K8nbXzJGd1KNFgCSpvMquRz/bo693HzM4GYDa1C8mSJe2DZUkVdgceun7FFVUX1sD0DiYTSb1jwmSJDkc0obUPGMDAQAANAIUfQEAAM4jh8Oh+Bpb2F3cKtjANMgprtDcdYd02+JEfbojSxU2ZwPb3WLS1Z2dBd+J/aMUQsG3wWsX7qeFE3po8pBYeVidH4sy8so0eVGinvhsj/KZQgEAQKMwrlekujf3lyQVldv07IoklTDlHwB+lbe7VTOu6aDBbUIkSTaH9Ow3B/Th1gyDkwGoTRP6tFCLQE9J0qGcUr27+bDBiQDUtv7RQa7jtSm5BiYBAABoHCj6AgAAnEepOaVKyy2TJLVt4qOIAE+DEzVOJ4orNGfdIU1YnKhPd54q+HpYzbq2SzPNG9tVd/aLUjAF30bFajFrfL8oLb6zl3pEBbrOf7E9UyPnbNK3u7LkcDiMCwgAAM47i9mk+wbHKrL6fXp6Xplmrk6Rzc57AAD4Ne5Wsx6/vK2u7drMdW72D6mavSaVz1BAI+FhNWva4Fi5W5w7l32157g2plL0AxqTZv4eahXiLcn5OSott9TgRAAAAA0bRV8AAIDzKD7p1DTfQXEhBiZpnE4UVWj22lTdvjhRn+/MUuXPC743dNXtfVsq2JuCb2PWMthbs2/qpkeHtZWvh1WSlFNcqUc+2a1pS3coq6DM4IQAAOB88nG3aPqlreTrbpEkbU3L1+KEIwanAoC6zWI2aergWN3at4Xr3IcJGZrxbZKqbHYDkwGoLVHBXrq1z6nngFk/HNLxogoDEwGobQNiT031XcdUXwAAgPOKoi8AAMB5YrM79ENyjiTJajapX42LXji/jheVa9YPqbptcaK+2HXstILviK7NNH+ss+Ab5O1mcFLUFWaTSdd2j9CySb01uG2o6/wPB7I1as5mLU/IkJ3JVAAANFgRAZ66f0iszM6hdPp4e6ZW11i0BwD4XyaTSeP7tNT9Q1u5nj+/3n1Mj32+V2WVNmPDAagVf2kbqr7RgZKkogqbXok/yM4IQCNyYcsAWarfBKxPzeP3HwAA4Dyi6AsAAHCeJGYUKK+0SpLUs2WA/KonheL8OVZYrjd/SNXti7fry93HVFV9YdHTatb13cK1YGxXTejTUoFeFHzxy8L8PPTCyM56bkRHhfg6Jz0XV9g046v9unPhNqWeKDY4IQAAOF+6Rvrr1gtrTKVbe0gHjvPaDwC/Z3iXZnryinZysziLPhtScnX/R7tUUFZpcDIA55vJZNLfBkQprPoayt6sYi3ddtTgVABqi6+HVd0i/CRJ+WVV2pVZZHAiAACAhouiLwAAwHkSX2MC2KC4EAOTNHzHCsv1xpoU3fHhdv23RsHXy82skd3CNX9sV916YQsFUPDFGRravomWTeytq7uFu879lJ6vG97eonlrU9mKFgCABmpYhzBd0sY53b/S5tCMlcnKKWYLagD4PRe3DtHz13aQt7tFkrTzaKHuWbpTxwrLDU4G4Hzz8bBq2qAY12Tv5T8d1Y4jhcaGAlBrBsSc2slwbUqugUkAAAAaNoq+AAAA50FJhU2bD+VJkvw8LLqgub+xgRqorMJyvbbaWfD9as/x0wq+oy5wFnzHU/DFH+Tv5abHr2yn2Td2U/MgL0nOws/s+BTdNG+rdh0pMDghAAA410wmk+7o10IdmvpKknJLKjVjZbLKq1jkAwC/p3uLQL06spOCvZ2fwVOySzR5yQ4dyikxOBmA861tU1/d0CNCkuSQ9OrqFBWUVRkbCkCt6BrhJ5/qhT4/Hs5XaaXN4EQAAAANE0VfAACA82BDaq4qbM7S6YDYYLlZeNt1Lh0tKNOr1QXfb/aeXvAd0z1CC8Z20y29W8jfk4Iv/rxeMUH68M5eurlvS1lMzvE0SceKdeuCBL284oBKK7h4DQBAQ+JmMevBobGuLaiTTpRo1tpDcjgcBicDgLqvTRNfvTG6syICPCU5F+hOWbJDezKZ7gk0dNd2aaYuEX6SpJySSr2+JpX3T0AjYLWY1ScqUJJUYXNoS1q+sYEAAAAaKBonAAAA50H8gWzX8aDWIQYmaViOFpRpZvxB3fnhdn2797hs1QVfb3eLq+A7rldz+XlaDU6KhsbTzaJ7hrbSuxN6qE31hD+7Q1q06bDGzN2sjQdzDE4IAADOpQAvN02/tJU8rc7Lpz8k5+iT7VkGpwKA+iEy0EtvjO6suDAfSVJ+WZXuW75Tm1PZzhtoyMwmk+4ZGCP/6utyCen5+mLXMYNTAagNA2KCXMfrUni9BwAAOB8o+gIAAJxjxwrLtSuzSJIUGeChuFBvgxPVf0fyy/TK986C74p9J1Td75WPu0Vje0Ro/tiuFHxRK9qF+2nhhB6aPCRWHtXFn4y8Mk1elKgnPtuj/NJKgxMCAIBzJTrYW/cMjHY9/mBrhrak5RmWBwDqkxAfd706spO6NfeXJJVW2vXwp3u0Yu9xg5MBOJ+Cvd10z8XRrsfvbclQ8okS4wIBqBWxIV5q5ufcEWXPsWKdKK4wOBEAAEDDQ9EXAADgHFuTfGqy58C4EJlMJgPT1G8Z+WV6+ftkTVyyXSv3n17wvbFnpOaP7aobezaXnwcFX9Qeq8Ws8f2itPjOXureMtB1/ovtmRo5Z5NW7D7G1pQAADQQfaKDNKZ7hCTJIemV+BSl5ZYaGwoA6glfD6uev7ajLo5z7nRkszv09Ff7tfzHIwYnA3A+dW8RoOGdmkqSquwOvfz9QZVW2AxOBeB8MplM6l9jqu/61DzjwgAAADRQFH0BAADOIYfDofikbNfji1sFG5im/srIK9VLq5I1acl2fbc/21Xw9fWw6KaekVowtqvG9oiULwVfGKhlsLfmjOumR4e1df23mFNcqekf79K0pTuUVVBmcEIAAHAujOzWTP2qb1qXVdr17IokFZZVGZwKAOoHD6tZT1zRVld1buo69/rqFM1de4gFkkADdmPPCLWq3uXsaEG55m5IMzgRgPOtX/Spou+6lFxe5wEAAM4xir4AAADnUPKJEh3JL5ckdQr3VRM/D4MT1S/puaV64btkTVq6Q6sOnCr4+nlYNK5Xcy0Y20039IiUDwVf1BFmk0nXdo/Qskm9NbhtqOv8DweyNWrOZi1PyJCdi9oAANRrJpNJUy6OVmyIs6ySVVihF1Ylq8rOazwAnAmL2aT7h7bSzRc2d537YMthvbAyiedSoIFys5h1/+BYebk5b0WvTspR/IHs3/kuAPVZmK+72jXxkeQs+KfmsBMKAADAuUTRFwAA4ByKCvbSXQNaqn1TXw2s3poSvy8tt1TPf5eku5buUHzSqYKvv6dVt/Rurvlju2lM9wh5u1uMDQr8ijA/D70wsrOeG9FRIb7ukqTiCptmfLVfdy7cptQTxQYnBAAAf4aH1ayHL2mlQC/ngrOdR4s0b0O6wakAoP4wmUy6rV+Upg6Olan63Jc7j+mfX+xVeZXN0GwAzo9m/h6a2D/K9Xju+jQdyWf3I6Ah6x8TpDZh3prQO1LNAz2NjgMAANCgmBxnsGdCQUGBAgIClJ+fL39//9rIBQAAUG9V2e2yms2yOxwym0y//w2NWFpuqRYnZOiH5BzVfFPq72nVtV2a6cqOTSn3ot4pKK3Uq98l69OfjrrOuVlMuv2iaN3St6WsFtZbAgBQX+3LKtLj/93vmkB5Z7+W+mv7MINTAUD9smrfcf3r6wOu59Iukf56Znh7+Xmyew/QEL2xJlWrqqf5xoR46bnh7WQ1c20EaIhO3hOx2R2ymLk3AjQGN7+/TdnFlQrxcdPCmy4wOg4A1Dtn08vlUxQAAMA5dvJCNSXfX5eaU6IZK5L0t6U7tKZGydff06pbL2yh+WO7atQFTPBF/eTv5abHr2yn2Td2U/MgL0lSpc2h2fEpumneVu06UmBwQgAA8Ee1beqruwacmkz3+c5MlVUyiRIAzsaQtmF67poO8nJzXj/ZnlGgqct2KLuowuBkAM6H2/u2UGSAh7zdzLquSzNKvkADdvKeCCVfAACAc4+JvgAAAKg1qdklWvRjhtYdzD3tfKCXVdd1DdcVHZrI041yLxqOskqb5q5J1Qcb02Wr/uhlNkk39G6hSQNj5EWZHQCAeunfmw7rYHaxHrqklTytFm5kA8AfsDezUA/9Z4/ySislSc38PfTGqM4K8/MwOBmAc+1IfpncrWYFebnxvgkAgAaksKxKdjlklokdOgDgDzibXi5FXwAAgHPAbrfLzDSKX5WSXaLFCRlal/K/Bd8RXcM1jIIvGri9Rwv1f1/s1f6sIte5yEBPTR/WVn1igw1MBgAA/gi73SGZJIeDaVUA8Gek55bqgY93KbOgXHFhPnpzdGd5WM0ysUsS0ODYHQ52QAMAAACAGij6AgAA1JK8vDwFBgaedo7S7ynJJ4q1OOGINqSeXvAN8nbTiK7hurx9GAVfNBpVNrve35Sut9ekqrzK7jp/ZZdm+vtfW8vbndXuAAA0JIWFhfL09JTJZJLVyus8APyaE0Xlen5Fkh75axv5eVhk+ZVrKiUlJSooKFCzZs1qOSGA8628vFwvvviiQkJCFBAQoBtuuMHoSADOkdLSUqWkpGjt2rVKS0vTzTffrLi4OO6hAAAA6Ox6uVxhBgAA+INmz56txMREff311+rfv79uvfVWXXLJJVygkrPguyghQxtT8047H+ztpuu7heuv7ZvIw8q/JzQuVotZ4/tFaUi7MD39xT79mJYnSVqfnC2Ho7UcDscvTq0qKSnRvn37dMEFF9RyYgAA8EdUVlbq/fff18svv6xu3bqprKxMy5YtMzoWANRZob4emnF1Bzn061PSp0yZooyMDKWnp2v06NF64IEHajckgPPC4XDoP//5j5544gn5+Pho1KhRev311yWJsi/QAHz22WeaN2+ekpOT5evrqz59+uiuu+7STTfdpFtvvdXoeAAAAPUKE30BAAD+gKVLl2r27Nl67LHHlJmZqfvuu095eXnq1KmTFi5cqE6dOhkd0RAHjhdrcUKGNh3KO+18sLebRnYL12UUfAFJzu0qP912VK9+l6xHr2irwe1CZf2FRQIff/yx5s6dq4qKChUWFurFF1/UwIEDDUgMAADORH5+vv75z3/qiy++0IwZM9SvXz9NmzZNwcHBmjVrltHxAKDeyc3N1cMPP6y1a9dq2bJlslgsGjVqlO655x7ddtttRscD8CfNmDFD//73v/W3v/1N99xzjyRpzZo1uvfee/XDDz/Ix8fH4IQA/qgZM2Zo1qxZmjRpkm655RZFRkZKkv773/9qxowZWrNmjcEJAQAAjMdEXwAAgPNswYIFevTRRzV06FBJ0tGjR9W5c2fFx8fr/fff14wZMwxOWLsOHC/Soq1HtLl6QulJIT5uGtktQpe1C5M7BV/AxWwy6druERrSLkwB3m6/+GcSExN19913a8aMGbrlllu0cOFCvfDCC+rfvz/bfwMAUEeNGzdO+fn5+u677xQVFSVJmj59ul5++WXZ7XZ2/wCAszRjxgzt2rVLq1evVmhoqCRpwoQJstvtBicD8GfNnz9fb775pj755BP17NnTdb6qqkrdu3en5AvUY6mpqVq3bp1WrFihtm3bnva1pk2bytvbW0VFRfL19TUoIYBz7fjx40pPT5cktWjRQmFhYQYnAoCGhyvLAAAAZ6mqqkpRUVHas2ePKisrJTmnbvr6+urRRx/Vjz/+qO3btxuc8txZtWqVFi9erMzMTEnOLfV+7sOE00u+oT7uumtAlN4Z01VXdWpKyRf4Fb9W8i0rK9Ozzz6rESNG6JZbbpEkXX/99crOztbhw4drMyIAADhDM2fO1IkTJ7R69WpXyVeSXnvtNdntdplMv7wdPQDgl3300UeaN2+eFi5c6Cr5StKcOXNc12MA1F9Hjx7VU089dVrJ99tvv9XNN9+sNm3aGJgMwJ8VHh6uvXv3KiIiwnUuPz9fc+fO1dixYzV69GhKvkADsX//fg0ZMkStW7fWjTfeqBtvvFGtW7fW0KFDlZSUZHQ8AGhQGAMFAABwFhwOh6xWq0aNGqWnnnpK69ev14kTJxQTE6P+/ftLklJSUtSiRQuDk/5527Zt07Rp01RZWam4uDi99957euutt/7nn81md+imXs218VCewnzdNbJbuP7SLkxuFsq9wB/13//+V5s2bdKuXbtc595++21VVVUpOjradc7hcFAaAgCgjsjPzz9tG/mqqipNnTpV27dv13/+8x9eswHgLO3bt08PPfSQYmNjXeduvfVWdejQQX/7298knVqMzHMsUP8cPnxYhw8f1vjx4yVJ//d//6cVK1booYce0pQpUySJHRGAesrDw0O9evXS1KlT1bVrV23cuFGHDx+Wt7e3HnvsMY0bN87oiADOkfHjx2vixIlauXKl6zXbbrfrgw8+0C233KJ169YZnBAAGg6KvgAAAGfh5I2jIUOGqKKiQocPH1ZQUJAuvvhiSdKLL76odu3aKSgoyMiY58T8+fM1bNgwPfjgg0pKStLjjz8um832P8VCi9mkmBBv/d/lbdQl0p+CL3AO/Pvf/9Zdd90lb29vSVJ6erqWLVumBx54QJJks9lksVhkMplkt9u1f/9+tWvXzsjIAAA0ev7+/nrvvfc0YsQIZWRkaOrUqQoNDdWsWbMUGRlJUQUAzlJFRYX279/venzbbbcpNzdX//jHPyT9bwGQ51mgfnn11VfVq1cvTZgwQevXr1d4eLimT5+uiy66yPVnTv5Os9AZqH9efvllxcfHa+HCherVq5fGjx+vjh07qnnz5pL4vQYaiuzsbNeuhCeZzWaNGzdOTz/9tEGpAKBhougLAABwFpKSkvTkk0/q1Vdf1V//+tf/uYkUERGhqVOnGpjw3CgqKlJubq7r8XvvvadDhw4pNzdXQUFBCggIOO1CnMPhUI+WgQalBRqevn37nrZ93b333qvOnTu7bnZZLBZJ0saNGzVr1iylpaXJx8dHb7755mkTfwEAQO257777tHPnTo0fP167du3S1VdfrcmTJ6tly5aSThVV8vLytGbNGg0fPtzIuABQ5z3wwAMaPny4hg0bJpvNpvz8fM2fP19t27aVdOp5dfPmzVq1apVrkUVcXJyRsQGcIXd3d33++efKzMzUNddco+HDh/9iYT8pKUlfffWVa8ovgPqhWbNmGjNmjMaMGSPpf4u9lHyBhiEgIEDLly/X9ddff9r5jz76SIGBgcaEAoAGyuQ4ua/RbygoKFBAQIDy8/Pl7+9fG7kAAADqpEceeUQzZ87U6NGj9eSTT6ply5b1fmJMVlaWmjZtKunUlFC73a4vv/xSH374oVavXq0OHTpo8ODBWr9+vUJCQvTvf/+73v9zA3XZp59+qgcffFB/+ctfVF5erg0bNuibb75RRESETCaT63f15Zdf1kcffaR169bp9ddf19tvv633339fXbp0MfofAQCARqu4uFgVFRW/ucvHbbfdppCQED3//PO1mAwA6qePPvpIzZo1U//+/SWdPrm3qqpKEyZM0LFjxzR06FC99957mj9/vnr27GlkZABnadeuXerYseOvfn3YsGEaOnSo7r///lpMBeDPWrRokUpKSnT77bf/4td3796tDh061HIqAOfS7t27NX78eKWnp7sWOqelpalFixZ699131b59e4MTAkDddja9XJoZAAAAZ+H777/XF198oaqqKk2cOFH79u07bQs5u91ucMIz98UXX6hfv3666aabdN9996miosI1JdRsNuuqq67SHXfcoYEDB+rbb7/V9OnT9fTTT2v16tXKyMig5AucR1dffbXi4+Pl6empyy+/XCtXrlRkZKRr0oXZbFZxcbEcDodiY2MlSVOmTFGfPn2UmZlpZHQAABo9Hx8frV+/XqtXr/6fr538vDBz5kwlJCTo3//+dy2nA4D6Z8SIEerfv7+rLHTyekRubq4SExNVVVWll156SQ8++KDuuOMO/fOf/1RZWZnBqQGcjaVLl+r777//n/NVVVWSpLfeeksfffSRtmzZUtvRAPwJl1xyiVq3bv2rX1+4cKG+/PLLWkwE4Fzr0KGDNm/erHXr1um5557Tc889p3Xr1mnz5s2UfAHgHKOdAQAAcIZWrlwpX19fDRkyRDNnzlRUVJSuvfZaff75564/U1/Kr8ePH9dzzz2nadOmaeHChdq5c6emTJniuhF2ctOHgoIChYSEKD8/X5LzJtqAAQPY5QGoBREREXrxxRd13XXXKSsrS2+++abra2+//bamTJmiPXv2aN++ferWrZsmT56slJQUVVRUSDr1ewwAAGpfq1atlJ6e7iqnSKdPoPTz89M777yjRx99lMIKAJyhkpISJSQkSHJO+X3ooYf0+OOPKycnRzfccIN27twpSQoLC5PVajUyKoCz9Oijj6pz586SdNogBavVqoqKCrVo0UJ/+ctfNGnSJNlsNqNiAjhLTZo00cCBAyWd+t222+2uz0nt2rXTggULDMsH4M+74447JEmxsbEaNGiQBg0a5BpOAgA4t+pHEwUAAKAO+O677zRy5EhJUkhIiObMmaNbbrlFb7zxhlasWOGatFkfrFmzRk2bNtX111+v8PBwvfPOO9q6davi4+PlcDhc/yxlZWVKSUnRM888o1mzZmnatGlq3bq1/Pz8DP4nABqXtm3bqmnTpsrKylJBQYHWrVundu3a6Z133tHmzZvVvXt3RUREaMGCBbryyislyfV7/NNPP2nr1q1GxgcAoNFp166dRo0adVrR7GTJ97vvvtPChQs1adIkeXh46PXXX1dlZaVRUQGg3rj99tvVu3dvSVJiYqKsVqvefvttff311+rTp49uvvlm7d27V126dJHValVKSopWrVplcGoAZ8Ld3V2hoaGSnO+ZSktLVVpaqk8++UT/+te/dNVVV2nZsmVyOBz65JNPDE4L4I84+XnIbDa7PieNHz9ehw8fVl5enoHJAPwZV199tdERAKDRYEkzAADAGRo+fLh69OghSbLZbLJYLLrvvvskSVdccYWWLl2qa665xsCEZ65du3ZatWqVqqqqZLVaFRUVpUGDBumjjz7S0KFD5ebmJkm69tprFRERoffff18JCQlauHChOnXqZHB6oPHx9PTU9ddf73pcVVWl5s2bux53795du3fvdp3Lz8/XunXrtHPnTq1cuVIrV65UQkKCLrjgglrPDgBAY+Xu7u46Xrt2rbZv366PP/5YR48eVefOnXXhhRfq3XffldVqdb3/BgD8tpPPrcePH1fr1q0VGRkpSRo6dKiaNWum//u//5MkVVZWKikpSQ8//LCuvPJKPfnkk4ZlBnDmysrK9NRTT2nXrl0qLy/Xtm3bdOutt6pbt25666235O3trcDAQKNjAjgLDodD8+bN0+HDh9WqVSu5ubnp8OHDat++vfz9/ZWVlaVly5a5poICqF9ODh4BAJx/JscZ7OdaUFCggIAA5efns00zAADAL/jyyy/Vq1cvNWnSxOgoZ8ThcOiKK67QwIED9dBDD0mSkpOT1b9/f+3fv1/+/v764Ycf1LlzZwUGBqqyspLyAVCHzJ49W7NmzdKjjz6qoKAgPfLII7rtttt000036auvvtLChQsVGRmpv//977rzzjsVEhKiZcuWGR0bAIBG6dVXX9UDDzygCRMmKDQ0VPfee6/c3NxcJZXMzEwdPXpUFotFXbp0MTYsANQTX3/9taZNm6YXX3xR0dHReuKJJ+Tn56d58+ad9ufy8vI0ZswY9enTR0888YQxYQGclREjRuiyyy5TWFiYLr/8cnl6erq+dvToURUWFqqwsFBxcXEKCAgwMCmAM3XTTTfJZrPJZrNp79696tq1q9LS0lRSUiI/Pz/FxcVp7ty5RscE8AdUVFTozTff1MGDB3XzzTerZ8+e+vzzz+VwODR8+PB6tRsqABjhbHq5FH0BAAD+BLvd7tpyqq5zOBynfaD+6KOP9PDDD2vv3r2yWCySpMGDB+vNN99UVlaWEhISdPfdd8vLy8uoyAB+w8aNG/XEE08oPDxcTZs21YwZM/Tggw/q66+/1uuvv65BgwZp9erVGjp0qNLS0hQREfE/zwMAAOD8czgcOnr0qJo2bep6333yNXnjxo1auHChVq9ercjISPXr148iGgCcodWrV+uhhx5SVFSUcnNz9dlnn8nT01Pl5eXy8PBQRUWF3N3d9d1332nu3LlavHhxvbmGA+CUkzur5eXlaebMmVq1apU8PT1ltVr13//+1+h4AM5CYWGh/Pz85HA4VFhYKH9/f+Xl5cnd3V3e3t5GxwPwB9x5551KTU3VkCFDtGrVKnXs2FGrV6+Wh4eH+vXrp5deesnoiABQp1H0BQAAgIvD4dD2IwXq0MxPbpbTb2j99a9/VWhoqP7+979r2bJl2rp1q7788ktufAH1yMmb2JJz+9opU6YoJSVFM2fO1HXXXadHHnlEU6ZMqVcLEwAAaIhOvhbXXHjzwAMPKDU11TWRskePHnr66ad1+eWXG5wWAOqHEydOyNfXVyaTyfW5qKbk5GQtWbJEBw8e1BtvvCEPDw8WPwL1SM33Td9++63uuecevfLKK7r88st1++23KyQkRM8995zBKQGciV/6PHTy3NGjR5WcnKwBAwYYnBLA2erQoYMSExPl5uamwsJChYeHKzMzU97e3urcubN27dpldEQAqNPOppdrraVMAAAA9dqhQ4fk6+srT09PmUwm1+ry8vJyrVixQldccUWdu1HkcDiUkJ6vRQkZ2nesWLf1aaGrOzeTxXwq54IFC7RgwQLdddddatWqlV5//XWKgEA9c/Jmtt1uV1hYmD788EOtWLFCV199tcrKyjRlyhRJ4ncbAACDmc1m10Q6SSorK1NhYaGmTp2q6OhoSdJll12mlJQUA1MCQP0SGhoqSUpNTdXSpUuVmZmp4uJi5eXlqaioSAcOHFC/fv30l7/8RZ6enganBXA2fr4rUUZGhq6++mrXgqgbbrhBb731lkpKSuTl5VXnrs0CON3Ja5M1f1dPnispKdE999yjH3/80ZBsAP44d3d3ubm5SZL8/Pzk7+8vHx8fmUwmWa1U0gDgXOJZFQAA4Hc88sgj2rlzpzZs2KAuXbpo8ODBuuqqq9S1a1ft27dPP/74o6688kqjY7o4HA5tScvX4oQM7T9e7Dr/UeJRXdWpqSw6dSEtPDzcNe3Tz8/PiLgAzhGz2azKykq5ubkpNjZWOTk5uuyyy7R48WKNHj2aoi8AAHWAxWJRQUGBiouLFR4ersjISL3wwgtq3769jh07pr1792rgwIFGxwSAeqdZs2ZaunSpDh48qLfeekvu7u7y9PRUaGio4uLiFBAQYHREAGfJZDLp8OHD+u6773TLLbfoggsu0LPPPqtbbrlF3t7eWr16tXr06OEayACg7qu549jJRZB2u12tWrVSaGio1q5dy1RfoJ5p1qyZnnvuOV111VV69913FRkZqYcfflj+/v6uRXkAgHPD5HA4HL/3h85mRDAAAEBDkpCQoPHjx2vFihXy9/fXZ599pk8//VTr1q3TNddco9dee00VFRVyd3c3Omp1wTdPixKO6ECNgq8kRQd7aWyPSPWLCWK6BdAITJo0Sb6+vnrxxRdVXl5+2ha2DodDxeU2+Xqy7hMAACM8/fTTWrVqlVatWiVJevPNN7Vjxw7Nnz9fAwYM0IoVK1xTfwEAZ85ut2vixIny9vbWq6++anQcAOfArl27dPnll2vNmjWKjo7WV199pbVr12r27Nlyc3PTV199pe7duxsdE8BZKisrc03aP3ntcvbs2UpMTNScOXMMTgfgbKSmpuquu+7Szp07NWjQIL311lv6xz/+ocOHD+uZZ55RbGys0REBoE47m14uRV8AAIDf8I9//ENZWVl66623Tju/e/duPfbYY7r//vvVv39/g9I5ORwObTqUp8UJGUo6UXLa12JCvHVD9wj1jQmSmYIv0Kj8fItLSbLZHVqy5bD+vf6QHrysjS5pH0b5HwAAAwwfPlytWrXSJZdcotzcXN15550aPXq0FixYIOmXX8cBAGdmwoQJstls+ve//81zKdAAvP7661q8eLFGjhwpPz8/TZ8+XZ07d9YHH3yg8PBw3jcB9ciSJUv097//Xdddd53S0tJUVFSkCy64QIcOHVJ5ebnWr1+vI0eOsDMZAABoNCj6AgAAnCOJiYl6+umn9fDDD6tHjx6nfW3KlCny9vbWc889Z0g2h8Ohjal5WvxjhpJ/VvCNDfHW2B6RujA6kIIvAEmS3eHQiaIKjZi1SaWVNknSRa1D9PDlbdTU39PgdAAANC7FxcV6/PHHlZOTo6qqKvXt21d33323JEq+AHAufPbZZ7riiitOm5B+8nYYz7FA/bNgwQIlJyeroKBAMTExuu+++yQ5J3lTCATqj8zMTK1atUrp6ekqKChQdHS0kpOTVV5erjZt2mj16tWaMWOGoqOjjY4KAABQKyj6AgAAnCOVlZV64oknNGfOHPXu3VujRo3SiBEjlJWVpeuuu04LFixQz549azWT3eHQxtRcLUo4opTs0wu+rUKrC75Rgdy4AvA/8koq9PSX+xS/74TrnI+7RVOGttJ13SNYGAAAQC2y2WyyWCynbVsLADi/XlyZpL4xQerfKsToKAD+gKqqKlmtVkksjgIAAABQ/1H0BQAAOMcOHz6sDz/8UJ9//rm2b9+uIUOGqFOnTnryySdrLYPd4dCGlFwtTshQSk7paV+Lqy749qbgC+AMfLfnmJ7/5oCyiypc5y5oEaBHr2yn6BBvA5MBANB4/FI5xe5wyGwyqcpml9XCdDoAOJfeWpuqRVsyZDFJD1wap2EdmxodCcCfRNkXqL9qTuT+tWMAAICGjqIvAADAeXDywnFBQYEKCgrUvHnzWvl77Q6H1h3M1eIfM3ToZwXfNmE+uqFHpHq1DOCiNoCzUlBaqVe/S9anPx11nXOzmHT7RdG6pW9LykUAANQym92hKrtDM+NT5G41696B0bzHB4BzxGZ36F9f79d3NXY3mTggSjf0jOS5FqiHbHaHzCZp2+ECdW8RYHQcAAAarcKyKtnlkFkm+XlajY4DAPUORV8AAIAGwFnwzdHihCM6lPuzgm8TH93YI1I9WlDwBfDnbEnJ1dNf7lVGXpnrXFwTHz1+ZTt1jODzHwAAtaXKbtdjX+zX/uPFkqSbekbquq7NDE4FAA2H3eHQm6tTtHzbqcWOI7tH6G8XR8vMtRWg3nA4HCooq9JL36do59FCPTgkVn1jgoyOBQBAo3Tz+9uUXVypEB83LbzpAqPjAEC9cza9XEY0AQAA/A67w+H6X22w2R1anZStu5ft1IyVyaeVfNs18dGTl7fRy9d0UM+WgZR8AfxpvWKCtGRib43r20Lm6qeUpGPFunVBgl5ZkaTSCpuxAQEAaCSsZrOu6XJqG/kPtmZoS1qecYEAoIExm0yaPDBGd/SPcp1b9uMRPfP1AVXa7AYmA3A2TCaTtmUUaOfRQknSm2sP6VhhucGpAPwRJ++7nMFsOgAAgEaPoi8AAMBvsNkduvej3Vqw6bDSfjZV93z8XfEHTuhvy3bo+e+ST/v72jf11VPD2upFCr4AzgNPN4umDo3Twgk91aapryTJ7pA+2JSuMXM3a9PBHIMTAgDQOPSJDtKY7hGSJIekV+JTzvvnEABoTEwmk27q3VwPXtLKtdBxxd7jeuTTPSqtZJEjUF8MbBWs/tVTfEsqbHolPkVVdoqCQH1zOK9Mi348okf+u182focBAAB+E0VfAACA3/BTRoEO55fpy13HtKzG1o7nks3u0PfVBd8XVh3U4bwy19c6NPPV01e01QtXt1f3FgEUfAGcV+3C/bRwQg9NHhIrD6vz42JGXpnuXpSoJz/bo/zSSoMTAgDQ8I3s1kz9qosrZZV2PbsiSYVlVQanAoCG5crOzfR/V7aTu8V5nWXzoTxNW76TzzxAPWEymTRpQJSa+rlLkvYdK9aSH48YnArA2fp05zF9uy9bGfnl2plZaHQcAACAOo2iLwAAwG+IP5DtOh4YF3JOf7bN7tCq/Sd019IdevFnBd+Ozfz0ryva6vnh7XVBcwq+AGqP1WLW+H5RWnxnL3VvGeg6//n2TI2cs0krdh9jOz0AAM4jk8mkKRdHKzbEW5KUVVihF1YlM6UOAM6xi+JC9OJ1HeXrYZEk7c4s0uSlO5RVUG5wMgBnwsfdovsGxaq6r6+PEzO1/UiBsaEAnJV+MYGu43UpucYFAQAAqAco+gIAAPyK4gqbNqflSZL8Pa26oLn/Ofm5NrtDK/cd16Ql2/XS9weVkX+q4Ns53E/PXNlOzw1vp24UfAEYqGWwt+aM66ZHh7WVT/WN75ziSk3/eJemLd2hrIKy3/kJAADgj/KwmvXwJa0U6GWVJO08WqR5G9INTgUADU/X5gF6bWRnBXu7SZLSckp195LtSs0uMTgZgDPRpomPxvaMlCQ5JM2MT1Eek7mBeqNruJ9rwU3C4QKVVNgMTgQAAFB3UfQFAAD4FRtSclVpc07NGhAbJDfLn3vrZLM7tGLfcU1csl2vxKfoSI0JMZ0j/PTsVe00Y3h7dY30p+ALoE4wm0y6tnuElk+6UIPahrrO/3AgW6PmbNbyhAzZme4LAMB5EerrroeGtpLV7Pxs8M3e4/p6z3GDUwFAw9MqzEezxnRRZKCnJOl4UYUmL9mhnUwGBeqFqzs3VbdI54CGvNIqvbEmlWsVQD1htZjVJypQklRpc2hLer6xgQAAAOowir4AAAC/Ij4p23U8MC7kD/+cKptd3+49rjuXbNfM+BQdrVHw7Rrhr+euaqcZV7VXl4hzMzEYAM61MD8PvTiys54b0VEhvu6SnFPPZ3y1XxMXbmPaFQAA50nbpr66a0CU6/G8DWnaebTQwEQA0DCFB3jqzdGd1baJjySpsLxK0z7apY0pOQYnA/B7zCaTplwcrQBP504IPx4u0Oc7jxmcCsCZ6h8d5Dpel5JrYBIAAIC6jaIvAADALzhWWK7dmUWSpMgAT8WFep/1z6iy2fXNnmO6c8kOvbo6RZk1Cr7dIv31/PD2euaqdupEwRdAPTG0fRMtm9hbV3cLd53blp6vsXO3aN7aVFXZ7AamAwCgYRrcOkTDOzWVJNkc0vPfJZ/22QIAcG4Eebtr5shO6tEiQJJUXmXXI5/u0Te7KQwCdV2Qt5umDoxxPf5ga4aSjhcbmAjAmYoN8VK4v4ckae+xYh0vqjA4EQAAQN1E0RcAAOAXrE46NbFlUOtgmUymM/7eSptdX+85pjs+3K7X1qQqq/DUTfgLmvvrhavb619XtlPHcL9zmhkAaoO/l5sev7KdZt/YzbW1bYXNrtnxKbpp3lbtYntbAADOuXG9ItW9uXOBYFG5Tc+uSFJJhc3gVADQ8Hi7WzXjmg4a3Ma5s5PNIT3zzQEtScgwOBmA39Otub+u6excHFVld+jl71N4vwTUAyaTSf1jTk313ZDKVF8AAIBfQtEXAADgZxwOh+KTsl2PL24VckbfV2mz66vdzoLv62tSdazGyvPuzQP04tXt9fQV7dShGQVfAPVfr5ggLZnYW+P6tpC5ei1E0rFi3bogQa+sSFIpN9MAADhnLGaT7hscq8gA56Sr9LwyzVydIpvdYXAyAGh43K1mPX55W13btZnr3Kw1qZq9JlUOB8+7QF02tmekWof5SJIyC8v11vo0fm+BeqBfdKDreG1KLr+3AAAAv4CiLwAAwM/sP16so9Vb4XYK91OYr/tv/vlKm11f7srSHR9u1xs/pJ62tVTPFgF66ZoOeuqKtmpPwRdAA+PpZtHUoXF6d0JPtWnqK0myO6QPNqVrzNzN2nQw53d+AgAAOFM+7hZNvzROPu4WSdLWtHwtTjhicCoAaJgsZpOmDo7VrX1buM59mJChGd8mqYpFFkCdZTWbNG1wjLzdnLfAf0jO0fcHsn/nuwAYLdTHXe2bnCzpV+hgdqnBiQAAAOoeir4AAAA/szrpVDFtUFzwr/65SptdX+zK0u2Lt2vW2kOnF3xbBujlazvoyWFt1a66/AYADVX7cD8tnNBDk4fEysPq/JiZkVemuxcl6snP9ii/tNLghAAANAwRAZ66f0isa5r+x9sztSaZhTUAcD6YTCaN79NS04bEqvppV1/vPqbHPtujskp2MAHqqqZ+Hpo0IMr1+O0N6crIKzMwEYAz0T8myHW8LiXXwCQAAAB1E0VfAACAGiptdq2tnkDpbjGpb3TQ//yZiiq7Pt+ZpdsWJ2r22kM6UXyq4Nu7ZaBeubaDnry8rdo2oeALoPGwWswa3y9Ki+/spe4tA13nP9+eqZFzNmnF7mNsuwcAwDnQLdJft154asLkrB9SdeB4sYGJAKBhu7pruJ68sq3cLM6674aUXN3/0S4VlLGgEairBsQG65I2oZKk8iq7Xvr+oCqq7AanAvBberUMkPvJ19pDeaqy8TsLAABQE0VfAACAGhLS81VU7pzKcmF0kLyqt8WVnBeFP9uRqdsXJ2rOukPKLj51Q+fCqEDNvK6j/nl5G7Wh4AugEWsZ7K0547rp0WFt5ePhfA7NKa7U9I93adrSHcoqYIoOAAB/1rAOYa7ySoXNoRkrk5VTYwEiAODcGtg6VM9f20He1deJdh4t1D1Ld+pYYbnByQD8mtv6tlDzQE9JUmpOqRZuOWxwIgC/xcvNou7NAyRJxRU2JR4pNDgRAABA3ULRFwAAoIb4pFPb3g6KC5bkLPh+Wl3wfWt9mrJLThV8+0QH6tXrOuoff22j1mE+tZ4XAOois8mka7tHaPmkCzWobajr/A8HsjVqzmYtT8iQnem+AAD8YSaTSXf0a6EOTZ2LDHNLKjVjZbLKmVQHAOdN9xaBenVkJwV5u0mSUrJLNHnJDh3KKTE4GYBf4mE16/7Bsa5p3P/dfVybD+UZGwrAbxoQc2qHxbUpuQYmAQAAqHso+gIAAFQrLKvSj+n5kqQgbze1beKjT7Zn6rbFiZq7Pk05NQq+faOD9NqIjnr8sjaKo+ALAL8ozM9DL1zfSc+N6KgQX3dJzokcM77ar4kLtyk1mxviAAD8UW4Wsx4cGquw6tfYpBMlmrX2kBwspgGA86ZNE1+9MbqzIgKcU0KzCss1ZckO7clk6iBQF0UFe+nWC1u4Hr/xQ6pOsAsCUGd1bOarAE+rJOmnI4UqKq8yOBEAAEDdQdEXAACg2tqDOaqyO+RwONTE1113fLhD72xIU26Ngm//mCC9fn0nPXZZa7UKpeALAL/HZDJpaPsmWjaxt67uFu46vy09X2PnbtH8tamqsjF9EACAPyLAy00PX9JKnlbnZd4fknP0yfYsg1MBQMPWPNBLb4zu7Fr4nV9WpfuW79SWQ0weBOqiy9qF6sKoQElSUblNM79Pkc3OwiigLrKYTeobHShJstkd2pSWb2wgAACAOoSiLwAAQLVV+7NVZberwmbX9iMFyiutUfCNDdIb13fSI39prdgQbwNTAkD95O/lpsevbKfZN3ZTZKBz+lWFza5Z8SkaN2+rdh0pMDghAAD1U0yIt+4ZGO16/MHWDG1JyzMsDwA0BiE+7np1ZCd1a+4vSSqttOuh/+zRyr3HDU4G4OdMJpPuvihKoT7OXRB2ZxVp2U9HDU4F4NcMiAlyHa89yCIaAACAkyj6AgCARq+00qb5G9O0O6vwtGkOJkkXxQbrzZGd9MilrRVDwRcA/rReMUFaMrG3xvVtIbPJee7AsWLduiBBr6xIUmmFzdiAAADUQ32igzSme4QkySHplfgUpeWWGhsKABo4Xw+rnr+2oy6OC5HknDz41Ff7tXzbEYOTAfg5Xw+r7hsc47oOsfyno9p1tNDYUAB+UcsgL7WoHhKQnF2izIJygxMBAADUDRR9AQBAo1VSYdPSbUc0YVGiPkrMPO1rF7dyFnwfvjRO0cEUfAHgXPJ0s2jq0Di9O6Gn2jT1lSTZHdIHm9I1Zu5mbTqYY3BCAADqn5Hdmqlf9fSrskq7nl2RpMKyKoNTAUDD5mE164kr2uqqzk1d516PT9Hb6w7J4XD8xncCqG3tm/q6FkbZHc6FUbxXAuqm/jWm+q5LZaovAACARNEXAAA0QicLvrctTtS7mw+roMYFXYvJpBlXtdNDl8QpioIvAJxX7cP9tHBCD00eHCt3i/PjaUZeme5elKgnP9uj/NJKgxMCAFB/mEwmTbk4WjEhXpKkrMIKvbAqWVV2imYAcD5ZzCbdP7SVbr6wuevc+5sP64WVPAcDdc21XZqpU7ifJCmnpFKv/5BKKR+og/pGBcpUPYF7XUqu7PyeAgAAUPQFAACNR0mFTR/+eEQTFv10WsHXJMlsMsnNYlavqEB1jvA3NigANCJWi1nj+0fpw4m91L1loOv859szNXLOJq3YfYybbgAAnCEPq1nTL4lToJdVkrTzaJHmbUg3OBUANHwmk0m39YvS1MGxqu4l6cudWfrnF3tVXmUzNBuAUyxmk+4dGC1/T+d7pa1p+frv7uMGpwLwc0HeburUzLkL2IniSh04XmJwIgAAAONR9AUAAA1ecXmVFidk6NZFP+m9LYdVWO68wWI2SUNah+iiVsFys5hlNpk0KC7E4LQA0Di1DPbWnHHd9OiwtvLxsEiScoorNf3jXbp/2U5lFZQZnBAAgPoh1NddDw1tJavZWTX7Zu9xfb2HAgsA1IbruoXrH8PauJ6D1ybn6MGPd6uwxm5SAIwV7OOuyRdFux6/u/mwDp6gRAjUNf1jglzHa1NyDUwCAABQN1D0BQAADVZReZUWJWTo1kWJen9rhopqFHyHtgnRnFGdNfniGO04UihJ8nIzq3dUoIGJAaBxM5tMurZ7hJZPulCD2oa6zq/Zf0Kj5mzW8oQMtuoDAOAMtG3qq7sGRLkez9uQpp1HCw1MBACNx5C2YZpxTQd5uTlvwSVmFGjqsh3KLqowOBmAk3q2DNBVHZtIkqrsDr0cf1CllUzfBuqSHs0D5Gl1vpZuTstTRZXd4EQAAADGougLAAAanKLyKn2w9bAmLErUB1szVFxxquB7SZtQvTW6i6YNbqXIQC9tSctTSaXzAlHf6CB5WHl7BABGC/Pz0AvXd9JzIzoqxMddklRcYdOMr/Zr4sJtSs1m0g4AAL9ncOsQDe/UVJJkc0jPf5eszIJyg1MBQOPQKypQM6/vpAAvqyQp+USJ/rZkuw7nlhqcDMBJN/WKVGyItyTpSH653tmQbnAiADV5WM3q1TJAklRaade2jAKDEwEAABiLJgsAAGgwCsur9P4WZ8F3UcKR0wq+l7YN1dzRXXTf4FhFBHi6vif+QLbreFDrkFrPDAD4ZSaTSUPbN9GySb11dbdw1/lt6fkaO3eL5q9NVZWNSR4AAPyWcb0i1b25vySpqNymZ1ckqaSCaXUAUBvaNfPTm6O7qJm/hyQps6Bcdy/ZoX1ZRQYnAyBJbhazpg2OkWf19O3vD2RrdVL273wXgNrUPybIdbw2JdfAJAAAAMaj6AsAAOq9wrIqvVdd8F3846mCr8Vs0l/ahWnumC66d1CswmsUfCUpt6RSP1WvAg/1cVeHZr61nh0A8Nv8vdz0+JXtNPvGbooMdD6PV9jsmhWfonHztmr3EaZ5AADwayxmk+4bHKvIAGfJLD2vTDNXp8hmdxicDAAahxZBXnpzdGfFVE8NzSut1NRlO5SQlmdsMACSpIgAT03s19L1+K31aTpaUGZgIgA1tWvio2BvN0nSjqOFyi+tNDgRAACAcSj6AgCAequgrFLvbk7XrYt+0oc/HnFNprKYTbqsXZjmju6iqQNjFO7v+Yvfv/Zgjk7e3x4YFyyzyVRb0QEAZ6lXTJCWTOytcX1byFz9dH3gWLHGL0jQzJVJKmU6IQAAv8jH3aLpl8bJx90iSdqalq/FPx4xOBUANB6hvh56fVRndY5wTlgvrbTrof/s1vf7TxicDIAkDYwL0eDqnd7KKu16+fsUVbKDEFAnmE0m9Y8OlCTZHdKGQ3mG5gEAADASRV8AAFDv5JdW6t1N6ZqwKFFLtx1VaaXzwqvVbNJf24fp7TFddM/AGNfWiL8mvsZWbAPjQs5rZgDAn+fpZtHUoXF6d0JPtWnqnMJud0jvb0zXmLmbtelgjsEJAQComyICPHX/kFjXYpmPEzO1JpnXTQCoLX6eVr00ooP6xTq3IK+0OfTkl/v0SeJRg5MBkKTb+7ZQRPUOCMknSvT+1gyDEwE4qV9MkOt4fUqecUEAAAAMRtEXAADUG/mllVpwsuD70+kF32EdmujtMV005eIYNfX77YKvJB3KKVVKdqkkKS7UW80Df3nqLwCg7mkf7qeFE3po8uBYuVucH2sz8sp096JEPfnZHrbxAwDgF3SL9NetF7ZwPZ71Q6oOHC82MBEANC4eVoueuqq9hnVsIklySJq56qAWbEiTw+EwNhzQyHm5WTRtUKys1auiPt95TAnp+QanAiBJkQGeign2kiSl5pbqcF6ZwYkAAACMQdEXAADUeXmllZq/MU0TFiVq+U9HVVZ1quB7RYcmeueGLrr7omg1OYOC70mra0zzHdSaab4AUN9YLWaN7x+lDyf2UveWga7zn2/P1Mg5m7Ri9zFulgMA8DPDOoTpkjahkqQKm0MzViYrp7jC4FQA0HhYzSb9/dI43dgr0nXu3xvT9fKqg7LZ+fwCGCk21Fu39G7uevz6mlTeJwF1RP8aU33XpeQamAQAAMA4FH0BAECdlVtSqXkbnAXfjxIzTyv4Xtmxiebd0FV/uyhaYb5nXvCVJJvd4dqm1mKSBsQGn/PsAIDa0TLYW3PGddOjw9rKx8MiScoprtT0j3fp/mU7dayg3OCEAADUHSaTSXf0a6H2TX0lOT9zzViZrPLqz1oAgPPPZDLpzgHRmjwwxnXus+2ZevK/+1TB8zFgqGEdwtSzZYAkqaCsSq+uTqWED9QBfaICZXEO3Nb61DzZ+b0EAACNEEVfAABQ5+SUVOidDWm6bXGiPt6e6brp7GYx6apOTTVvbFfdNSBaob7uf+jn7zhaqJwS57bu3VsEyN/Tes6yAwBqn9lk0rXdI7R80oUa1DbUdX7N/hMa+dYmLU/IkJ3pvgAASJLcLGY9ODRWoT7Oz1NJJ0o0e+0hJuEDQC0b2T1Cj/21tSxmZ3Np9YFs/f0/u1VcXmVwMqDxMplMmnxRtIK93SQ5ryN/sj3T4FQA/D2t6hLhJ0nKLa3U7mNFBicCAACofRR9AQBAnZFTXKG56w/ptkWJ+qRGwdfdYtLwTk0174aumtQ/ynVD+o9afSDbdTwoLuRP/SwAQN0R5uehF67vpOdGdFRI9WtFcblNM77ar4kLtyk1u8TghAAA1A2BXm6afmkreVqdl4fXJOfok+1ZBqcCgMbn0vZN9OzV7V3Px9vS8zV1+U7lFFcYnAxovPw9rbpvUIyqO/j68Mcj2ptFqRAwWv+YINfxuoO5BiYBAAAwBkVfAABguOziCs1dd0i3LU7UpzuyVGFzTpJyt5h0dWdnwXdi/yhXaevPKK20aeOhPEmSj7vFtRUbAKBhMJlMGtq+iZZN6q2ru4W7zm9Lz9fYuVs0f22qqmxshwsAQEyIt+4ZGO16/MHWDG1JyzMsDwA0VhdGB+mV6zu5dpw6cKxYk5fs0JG8MoOTAY1Xx3A/XV99TcHukF7+PkVFTNsGDNUt0l/ebhZJ0tbDBSqrtBmcCAAAoHZR9AUAAIY5UVyhOScLvjtPFXw9rGZd26WZ5o3tqjv7RSn4HBR8T9qYmueaFDwgNkhuFt4OAUBD5O/lpsevbKfZN3ZTZKCnJKnCZtes+BSNm7dVu48UGJwQAADj9YkO0pjuEZIkh6RX4lOUlltqbCgAaIQ6hPvp9VGd1cTPeQ0sI79Mdy/ZrgNsTQ4YZmS3cHVo6ivJeR171tpDcjgcBqcCGi93i1kXRjkHt5RX2ZVwmGt7AACgcaHZAgAAat2JogrNXpuq2xcn6vOdWar8ecH3hq66vW9LBXufu4LvSauTsl3HA+NCzvnPBwDULb1igrRkYm+N69vCte3mgWPFGr8gQTNXJqm0gukfAIDGbWS3ZupXvQ1uWaVdz65IUmEZE+sAoLZFh3jrzdFdFBXsJUnKKanU1GU7tS093+BkQONkMZt076AY+bo7J4huTM3Tt3tPGJwKaNz6V39ukaS1KbkGJgEAAKh9FH0BAECtOV5Urlk/pOq2xYn6Ytex0wq+I7o20/yxzoJvkLfbefn7TxRXaMeRQklSMz8PtW3ic17+HgBA3eLpZtHUoXF6d0JPtamexmN3SO9vTNeYuZu16WCOwQkBADCOyWTSlIujFRPiLJZlFVbohVXJqrIzsQ4AalsTPw+9PqqzOoT7SZKKK2z6+ye7tOZA9u98J4DzIdTXXZMvjnY9nr8pXYdy2P0AMErrUG818XUOiNmdWaSckgqDEwEAANQeir4AAOC8O1ZYrjd/SNXti7fry93HXDeMPa1mXd8tXAvGdtWEPi0V6HV+Cr4nrUnK0clb1YNaB8tkMp3Xvw8AULe0D/fTwgk9NHlwrNwtzo/DGXlluntRop78bI/ySysNTggAgDE8rGZNvyROgV5WSdLOo0WavzHd4FQA0DgFeLnp5REddWG0c2phhc2hf365V5/vyDQ4GdA49Y4K1OUdwiRJlTaHXvr+oMqr7AanAhonk8nkmurrkLQhNc/QPAAAALWJoi8AADhvjhWW6401Kbrjw+36b42Cr5ebWSO7hWv+2K669cIWCjjPBV9JcjgcWp10avrJxa1CzvvfCQCoe6wWs8b3j9LiO3upe8tA1/nPt2dq5JxNWrH7mBwOJhgCABqfUF93PTS0laxm54LIr/cc1zd7jhucCgAaJy83i54Z3k5/ae8sF9od0osrk7VwUzqfVwAD3NKruaKDnbsfHM4rY0EUYKB+0YGu47UpubwuAgCARoOiLwAAOOeyCsv12mpnwferPcdPK/iOusBZ8B1fSwXfkw5mlyg9r0yS1L6pr5r5e9Ta3w0AqHuiQrw1Z1w3PTqsrXw8LJKknOJKTf94l+5ftlPHCsoNTggAQO1r29RXdw2Icj1+Z0Oadh4tNDARADReVotZ0y9rrdE9Ilzn5q1P06vxKbJTagJqlbvVrGmDY+Vhdd5aX7HvhNYezDE4FdA4NfXzUOswb0lSRn65DuWWGpwIAACgdlD0BQAA58zRgjK9Wl3w/Wbv6QXfMd0jtGBsN93Su4X8PWuv4HtSfNKpC6+D4oJr/e8HANQ9ZpNJ13aP0PJJF2pQ21DX+TX7T2jkW5u0PCGDG+gAgEZncOsQDe/UVJJkc0jPf5esTBbAAIAhzCaT/nZxjCZddGoRxic/HdVTX+1Xpc1uYDKg8Wke6Knb+7ZwPZ6z9pCyCnmPBBhhQEyQ63hdSp5xQQAAAGoRRV8AAPCnHS0o08z4g7rzw+36du9x2aoLvt7uFlfBd1yv5vLztBqSr8ru0A/JzqKvm8WkfjUuAgEAEObnoReu76TnRnRUiI+7JKm43KYZX+3XxIXblJpdYnBCAABq17hekbqgub8kqajcpmdXJKmkwmZwKgBovG7o2VzT/9JaFpPz8ap9J/Twf/aopKLK2GBAIzOkdYguauUcIlFSadfL36e4hl0AqD29WwbIana+KG44lMfvIQAAaBQo+gIAgD/sSH6ZXvneWfBdse+ETl5L8XG3aGyPCM0f29XQgu9JPx3OV0GZ88ZHr5aB8vEwNg8AoO4xmUwa2r6Jlk3qreFdw13nt6Xna+zcLZq/NlVVTMwCADQSFrNJ0wbFKDLAQ5KUnlemmavZKh4AjPTXjk309PD28rA6b+1tTcvTvct3Ka+k0uBkQONhMpk0sV9LNfNzvkc6cLxYixMyDE4FND4+7lZdEOlcmFhQVqWdRwsNTgQAAHD+UfQFAABnLSO/TC9/n6yJS7Zr5f7TC7439ozU/LFddWPP5vKrI4Xa+KQc1/GguGADkwAA6jp/Lzf946p2mnVjV0UGekqSKmx2zYpP0bh5W7X7SIHBCQEAqB0+HlZNvzROPu4WSdLWtHwtSjhicCoAaNz6xQbr5REd5evhfG7el1WkyUu362h+mcHJgMbD292i+wbHuCZsf7I9Sz8d5loBUNsG1Ni5cV1KroFJAAAAagdFXwAAcMYy8kr10qpkTVqyXd/tz3YVfH09LLqpZ6QWjO2qsT0i5VtHCr6SVFxepS1peZIkf0+rujUPMDYQAKBe6B0TrCUTe2tc3xaq3glQB44Va/yCBM1cmaRSti8HADQCEQGeun9IrOu18OPETK1JzvntbwIAnFedIvz1+qjOCvVxlySl55Zp8pIdSj5RbHAyoPFoHeajm3o1dz1+bU2K8kqZrg3Ups4RfvKrXvjy4+ECFXOtDgAANHAUfQEAwO9Kzy3VC98la9LSHVp14FTB18/DonG9mmvB2G66oUekfOpQwfek9Sm5qrQ5A18UGyzryTvUAAD8Dk83i6YOjdO7E3qqTVNfSZLdIb2/MV1j5m7WpoMUnQAADV+3SH/demEL1+NZP6TqwHHKZABgpNhQH705prNaBnlJkk4UV+iepTu0PYOpokBtuapTE13Q3F+SlFdapddWp8rucBicCmg8rGaT+kQFSpIq7Q7XwBcAAICGiqIvAAD4VWm5pXr+uyTdtXSH4pNOFXz9Pa26pXdzzR/bTWO6R8i7eivXumh10qkS1qDWwQYmAQDUV+3D/bRwQg9NHhwrd4vzY3RGXpnuXpSoJz/fo3ym9gAAGrhhHcJ0SZtQSVKFzaEZK5OVU1xhcCoAaNya+Xvq9dGd1a56UWJRuU33f7RL65KzDU4GNA5mk0lTLo5WoJdz+MVPGQX6dEeWwamAxqV/TJDreF1qnnFBAAAAagFFXwAA8D/Sckv13Mok/W3pDq1OytHJOQQnC77zbuiqURfU7YKvJGUWlGt3VpEkqUWgp2JDvA1OBACor6wWs8b3j9LiO3upe8sA1/nPEzM1cs4mrdh9TA4m9wAAGiiTyaQ7+rVQ++oyWW5JpWasTFZ5ld3gZADQuAV6uemV6zupV/VEwwqbXY9/vlf/3UXZEKgNgV5umjowRif3kFu0NUP7j7HzAVBbYoK9FOHvIUnad6xYx4tYjAgAABouir4AAMAlNadEM1Y4C75rkk8v+N56YQvNH1s/Cr4nrakxwWRgXIhMJtNv/GkAAH5fVIi35oy7QI8MaysfD+frYU5xpaZ/vEv3L9upYwXlBicEAOD8cLOY9eDQWIX6uEuSkk6UaPbaQyx0AQCDebtb9OzV7TW0rXPyus0hPfdtkhZtOcxzNFALukb669quzSQ5f/9eiT+o4gqbwamAxsFkMp0+1Tcl18A0AAAA5xdFXwAAoNTsEj2z4oDuXrZTPxw8VfAN9LJqQp8WWjC2q67vFi4vt/pR8JUkh8Oh+AM5kiSTpItbBRsbCADQYJhNJl3XPULLJ12oQdU30yVpzf4TGvnWJi1PyJCdG+oAgAYo0MtN0y9tJU+r87LymuQcfbKdqZEAYDQ3i1mPXd5GI7qFu869tfaQ3lyTymcToBaM6R6htk18JElZhRWaw2IooNb0jQ50TdVel5LL7x4AAGiwKPoCANCIpWSX6JlvD+ju5Tu17uCplc6BXlbd1qeF5t3QVSO6hsuzHhV8T9p/rFiZhc6pip0j/BTq625wIgBAQxPm56EXru+k50Z0VEj1dMPicptmfLVfExduU2p2icEJAQA492JCvHXPwGjX4w+2ZmhLWp5heQAATmaTSVMGxej2/i1d55b9eETPfH1AVTa7gcmAhs9qNum+QTGunfDWpeTqu/3Zv/NdAM6FUB93tW/qK0nKKqpQMtfjAABAA0XRFwCARij5RLGe/uaAJi/fedpWRkHebrq9b0vNu6GrrqunBd+T4pNOXUgdGBdiYBIAQENmMpk0tH0TLZvUW8O7npqetS09X2PnbtH8tancVAcANDh9ooM0pnuEJMkh6ZX4FKXllhobCgAgk8mkcb1b6MFLWslcPd5wxd7jmv7ZHpVW2owNBzRwTfw89LcBUa7H72xI0+E83h8BtaF/TKDreG1KnmE5AAAAzieKvgAANCLJJ4r11Df7dc9Hu7Qh9VTBN9jbTXf2cxZ8r+3SrF4XfCWp0mZ3TSj2sJrVJyrQ2EAAgAbP38tN/7iqnWbd2FWRgZ6SpAqbXbPiUzRu3lbtPlJgcEIAAM6tkd2aqV9MkCSprNKuZ1ckqbCsyuBUAABJurJzM/3fle3kbnG2fTen5mna8p3KL600OBnQsPWLCdJf2oVKkipsDr20KkXlVSz+Bc63ni0CXK95mw7lqZJF9wAAoAGi6AsAQCNw4Hix/u9rZ8F3Y2qe63ywt5sm9mupd27oqqs7N5OHtWG8NUhIz1dRhXNKSZ+oQHm51+/iMgCg/ugdE6wlE3trXN8WrglaB44Va/yCBM1cmaTSCqZoAQAaBpPJpCkXRysmxEuSlFVYoRdWHVSV3WFwMgCAJF0UF6IXr+soXw/ndbHdmUWasnSHjhWWG5wMaNhuvbCFWlQvAD6UW6p3Nx82OBHQ8Hm5WdSzRYAkqbjCpsQjhQYnAgAAOPcaRpsHAAD8ogPHi/TkV/t178e7tOlQnut8iI+bJvWP0rwbump4Ayr4nhR/INt1PDAuxMAkAIDGyNPNoqlD4/TuhJ5q09RXkmR3SO9vTNeYuZu16WCOwQkBADg3PKxmTb8kToFeVknSzqOFmr8x3eBUAICTujYP0GsjOyvY202SdCinVH/7cLtSs0sMTgY0XB5Ws+4fEuuaLvr1nuPaWGN3PQDnR7/oINfxuhR+5wAAQMPTsFo9AABAkrTvWJH++dU+3fvxbm1Oy3OdD/Vx110DovTOmK66qlNTuTewgq8kFZRVKSE9X5IU5O2mzhF+BicCADRW7cP9tHBCD00eHCt3i/M1NyOvTHcvStSTn+9h21wAQIMQ6uuuh4a2ktV8qszyzZ7jBqcCAJzUKsxHs8Z0UWT1hNHjRRWasnSHdh4pMDgZ0HC1DPLShD4tXI/f/OGQjhdVGJgIaPg6NvN1LUD86UihCsurDE4EAABwbjW8dg8AAI3Y3qwi/fO/+zTtk93ampbvOh/m666/DYjSOzd00ZUdG2bB96R1B3Nkq94pdmCrYFlO7psOAIABrBazxveP0uI7e6l7ywDX+c8TMzVyziat2H1MDgdbnAMA6re2TX1114Ao1+N3NqRp51G2ywWAuiI8wFNvju6sNk18JDkXyk/7aJc2prDbCHC+XNo2VP1inBNGiytseiX+oGx2Pv8D54vFbFLf6EBJks3uOG2XSwAAgIag4bZ8AABoRPZkFurxL/fp/v/s1tb00wu+ky+K1ttjuuiKjk3lZmn4L/3xSdmu44FxIQYmAQDglKgQb80Zd4EeGdZWPh4WSVJOcaWmf7xL9y/bqWMF5QYnBADgzxncOkTDOzWVJNkc0vPfJSuT1zcAqDOCvN018/pO6t7CuQCxvMquRz7do292HzM4GdAwmUwm3dW/pcJ83SVJe7OKtWTbEYNTAQ1b/+gg1/G6lDzjggAAAJwHDb/tAwBAA7Y7s1CPfblXD3y6Rz8ePlXwbVKj4Ht5hyaNouArObdDP3C8RJIUE+ylqGAvgxMBAHCK2WTSdd0jtGzihRrYJtR1fs3+Exr51iYtT8iQnem+AIB6bFyvSF3Q3F+SVFRu07MrklRSYTM4FQDgJB8Pq567poMGtXYujrc5pGe+OaAlCRkGJwMaJh8Pq6YNitHJTec++ilTO46w6wFwvrQM8lLLQE9JUnJ2iY4WlBmcCAAA4NxpHK0fAAAamF1HC/XoF3v14Kd7tO1wget8Uz933XNxtOY2soLvSatrTPM9ecMCAIC6pom/h14c2UnPjeioEB/nZJ/icptmfLVfk97bptTsEoMTAgDwx1jMJk0bFKPIAA9JUnpemWauTmEhCwDUIe5Ws/4xrK2u6drMdW7WmlTNXpMqB8/XwDnXtqmvxvaIlCQ5JM1cnaL80kpjQwENWP8YpvoCAICGqXG1fwAAqOd2HinQI5/v1d8/26OfMmoWfD00dWCM5o7uosvaN76CryTZHQ7FJ+VIkswmaUBssMGJAAD4dSaTSUPbN9GySb01vGu46/yPafkaO3eL5q9NVZXNbmBCAAD+GB8Pq6ZfGicfd4skaWtavhYlsE01ANQlFrNJ9w6O1a19WrjOfZiQoRnfJqnKTtkXONeu6dJUXSL8JEm5JZV644dDFOuB86RvdKBM1VO016fmsugQAAA0GI2vBQQAQD20/UiBHv58jx76fK8Sj5wq+Ib7e+jegTGaO7qz/tIuTNZGWPA9aXdmkU4UV0iSukX6K8jbzeBEAAD8Pn8vN/3jqnaadWNXRVZvLVhhs2tWfIrGzduq3TVe9wEAqC8iAjx1/5BY1zbVHydmak1yjrGhAACnMZlMGt+3paYNiVX107W+3n1Mj322R2WVNkOzAQ2N2WTSPQNj5O9plSQlpOfri13HDE4FNEyBXm7q3MxZrD9RXKn9x4oNTgQAAHBuNN42EAAAdZzD4VBiRoEe/myPpn++VzuOFLq+FuHvofsGxeit0V10aSMv+J4UfyDbdTwoLsTAJAAAnL3eMcFaMrG3xvVt4SpFHThWrPELEjRzZZJKK7jRDgCoX7pF+uvWC09Nipz1Q6oOHOcmOwDUNVd3DdcTV7SVm8X5QWRDSq4e+HiXCsuqDE4GNCzB3m66Z2C06/F7WzKUfIL3RsD50D8m0HW8NiXXuCAAAADnEK0gAADqmJMF34c+26tHvtirHUdrFHwDPHT/4FjNGd1Fl7QNk+VkE6iRK6+ya0Oq82KNt5tZvaICjQ0EAMAf4Olm0dShcXp3Qk+1aeorSbI7pPc3pmvM3M3adJBJiACA+mVYhzBd0iZUklRhc2jGymTlVO/EAgCoOwa1CdXz13SQt7tFkrTjSKGmLN2h40XlBicDGpbuzQN0deemkqQqu0Mvf5/Cwl7gPOjePECeVmcVZnNaviqq7AYnAgAA+PMo+gIAUEc4HA5tO5yvhz7bo0e+2KtdmacKvs0DPZ0F31FdNKRNKAXfn9l8KE+llc4LNX1jguRh5S0OAKD+ah/up4UTemjy4Fi5V0/tz8gr092LEvXk53uUX1ppcEIAAM6MyWTSHf1aqH31ApbckkrNWJmscm60A0Cd071loF4d2UlB3m6SpJTsEt394Q6l5ZQYnAxoWMb2iFBcqLck6WhBueauTzM4EdDweFjN6tUyQJJUVmXXj4cLDE4EAADw59GCAQDAYA6HQz+m5+vBT/fosS/3aVdmketrzQM99eCQWM0a2ZmC72+IT8p2HQ+KCzEwCQAA54bVYtb4/lFafGcvda++MSFJnydmatSczVq5+5gcDoeBCQEAODNuFrMeHBqrUB93SVLSiRLNXnuI1zEAqIPaNPHVG6M7KyLAU5KUVViuyUt3aE+NgQQA/hw3i1nTBsfKy815m351co6+P5D9O98F4GwNiAlyHa9NyTUwCQAAwLlB0RcAAIM4HA4lpOfpgf/s1uP/3ac9WacKvi0CPfX3oa00a2RnDWpNwfe35JZUKjHDuRo7zNdd7Zv5GpwIAIBzJyrEW3PGXaBHhrWVj4dzG93s4go9/PEu3b9sp44VsJUuAKDuC/Ry0/RLW7m2z12TnKNPtmcZnAoA8EuaB3rpjdGdFRfmI0nKL63Sfct3asshSlLAudLM30OT+ke5Hr+9Pk1H8ssMTAQ0PG2b+Cikekr9zsxC5bFDFgAAqOco+gIAUMscDoe2puXp/v/s1j/+u197jxW7vhYV5KWHhrbSmyM7a2BcCAXfM/BDco7s1YOgBrYKltnEvzMAQMNiNpl0XfcILZt4oQa2CXWdX7P/hEa+tUnLEzJkZyoiAKCOiwnx1j0Do12PP9iaoS1peYblAQD8uhAfd706spO6NfeXJJVW2vXwf/Zo5d7jBicDGo6LWgVraBvn7nRlVXa99P1BVdrsBqcCGg6zyaR+1VN97Q5p46E8YwMBAAD8SRR9AQCoJQ6HQ5sP5WnaJ7v1z6/2a1/Ngm+wlx6+JE5vjOykiyn4npX4pFPbmg2MCzEwCQAA51cTfw+9OLKTnhvRUSHV258Xl9s046v9mvTeNqVmlxicEACA39YnOkhjukdIkhySXolPUVpuqbGhAAC/yNfDquev7aiL4oIlSVV2h576ar+WbzticDKg4bitTwtFBnhKklKyS7VwS4bBiYCGpX90oOt4bQqT6QEAQP1G0RcAgPPMWfDN1X2f7NaTX+/X/uOnCr7RwV565NI4vXF9J13ENNqzlppTotQc503h1mE+igz0NDgRAADnl8lk0tD2TbRsUm8N7xruOv9jWr7Gzt2i+WtTVcUEIABAHTayWzPXZK2ySrueXZGkwrIqg1MBAH6Jh9WsJ69opys7NXWdez0+RW+vOyQHu4oAf5qnm0XTBsfIzeK8L/DlrmPseACcQxEBnooN8ZIkpeWWKT2PRYYAAKD+ougLAMB54nA4tDE1V/d+vEtPfn1AB2oUfGNCvPXIpXF6/fpO6h9LwfePWp2U4zoeVD1dBACAxsDfy03/uKqdZt3Y1bXQpcJm16z4FI2bt1W7jxQYnBAAgF9mMpk05eJoxVTfcM8qrNALqw6qyk5hDADqIovZpAcuaaWbL2zuOvf+5sN6YWUyz93AORAT4q3xvU/9fr2xJlXZxRUGJgIalv7RQa7jdSl5xgUBAAD4kyj6AgBwjjkcDm1IydXUj3fpqW8OKOnEqW20Y0O89dhfWuu1ER0p+P5JNrtDa5KdRV+r2aT+sRR9AQCNT++YYC2Z2Fvj+rSQufptxYFjxRq/IEEzVyaptMJmbEAAAH6Bh9Ws6ZfEKdDLKknaebRQ8zemG5wKAPBrTCaTbusXpXsGxejk1cwvd2bpn1/sVXkVnzmAP+uv7cPUOypQklRYbtPM+BTZKNID50SfqEBVD83WhtRc2fndAgAA9RRFXwAAzhG7w6H1KTma8tEuPf3tASXXKPi2CvXW45c5C759Y4Io+J4DO44UKrekUpLUvUWA/D2tBicCAMAYnm4WTb0kTu9O6Kk2TX0lSXaH9P7GdI2Zu1mbDub8zk8AAKD2hfq666GhrWStXqny9Z7j+mbPcYNTAQB+y4gLIvT4sDau5+61yTl68OPdKiyrMjgZUL+ZTCbdfVGUQn3cJEm7Mov0UeJRg1MBDYOfp1VdI/0lSbmlVdqdVWRwIgAAgD+Goi8AAH+S3eHQuoM5umf5Tv3r2ySlZJ8q+MaFeusfl7XWq9d1VJ/oIJko+J4z8UnZruNBcUzzBQCgfbifFk7oocmDY+VucX7cz8gr092LEvXk53uUX1ppcEIAAE7Xtqmv7hoQ5Xr8zoY07TxaaGAiAMDvGdo2TDOu6SAvN+dnjsSMAk1dtkPZRRUGJwPqNz8Pq+4dFOParWfptqPancn7IuBc6B8T5Dpem5JrYBIAAIA/jqIvAAB/kN3h0A/JOZq8fKeeWZGklJxS19fahPnon39to5nXddSFFHzPudIKmzamOi/G+Lpb1KNFgMGJAACoG6wWs8b3j9LiO3upe8tTr4+fJ2Zq1JzNWrn7mBwOtigEANQdg1uHaHinppIkm0N6/rtkZRaUG5wKAPBbekUF6pXrOynAy7nDVvKJEt29ZLsO55b+zncC+C0dmvlp1AXhkpw79bwSn6LCciZmA39Wtwg/+bhbJElb0/NVWmkzOBEAAMDZo+gLAMBZchZ8szV52U7NWJmkQzULvk189OTlbfTytR3UOyqQgu95siE1VxU2Z0mpf2yw3Cy8pQEAoKaoEG/NGXeBHhnWVj4ezhsZ2cUVevjjXXpg2U4do0AFAKhDxvWK1AXNndvpFpXb9OyKJJVWcPMdAOqy9s389OboLmrm7yFJOlpQrruX7NB+tkQH/pQRXcPVsZmvJCm7uFJvrkllwS7wJ7lZzLqwekF8hc2hren5BicCAAA4e7RiAAA4Qza7Q6uTsnX3sp2asTJZh2pMqGh3suB7TQf1bEnB93xbnZTjOh7UOtjAJAAA1F1mk0nXdY/QsokXamCbUNf51ftPaORbm7Q8IUN2bhYCAOoAi9mkaYNiFBngLIul55Vp5uoUXqcAoI5rEeSlN0d3VkyItyQpr7RSU5fv1I9pecYGA+oxi9mkewfFyK960e7mtHx9vee4wamA+q9/TJDreF1KnnFBAAAA/iCKvgAA/A6b3aH4Ayf0t2U79Px3yUqrUfBt39RXTw1rqxcp+Naa40UV2nm0UJIU7u+hNmE+BicCAKBua+LvoRdHdtKMER0V4uMuSSout2nGV/s16b1tSs0uMTghAACSj4dV0y+Nc22puyUtX4sSjhicCgDwe0J9PfT6qM7qHOGczF5SYdPf/7Nb8ftPGJwMqL9CfNw15eJo1+N/bz6sFD67A39KXKi3mvo6r4vtySpSdnGFwYkAAADODkVfAAB+hc3u0PfVBd8XVh3U4bwy19c6NPPV01e01QtXt1f3FgEUfGvRmuRsnZzpNDAuhH/3AACcAZPJpEvaN9GySb01vGu46/yPafkaO3eL5q9NVZXNbmBCAACkiABP3T8kVubqj3kfJ2ZqTXLOb38TAMBwfp5WvTSig/rFOqclVtoceuLLffpP4lGDkwH1V8+WgbqiYxNJzt+pl78/qLJKm8GpgPrLZDK5pvo6JK1PzTM0DwAAwNmi6AsAwM/Y7A6t2n9Cdy3doRd/VvDt2MxP/7qirZ4f3l4XNKfgW9scDodWJ526yTswLtjANAAA1D/+Xm76x1XtNOvGrooM9JQkVdjsmhWfopvnJ2j3kQKDEwIAGrtukf669cIWrsezfkjVgePFBiYCAJwJD6tFT13VXpdXFxMdkl5ZdVALNqTJ4XD89jcD+EU394pUTIiXJCkjv1zzNqYbnAio3/rFBLqO16fm8voEAADqFYq+AABUs9kdWrnvuCYt2a6Xvj+ojPxTBd/O4X565sp2em54O3Wj4GuY5BMlruJ1h6a+aurnYXAiAADqp94xwVoysbfG9Wnhmpq4P6tI4xckaObKJJVWMCUIAGCcYR3CNLRNiCSpwubQjJXJymFrXQCo86xmkx66NE5je0a6zv17Y7peXnVQNjtlKuBsuVnMun9wrDytzlv63+3P1g/sdgD8YU18PdQmzFuSszyfmltqcCIAAIAzR9EXANDo2ewOrdh3XBOXbNcr8Sk6UlDu+lrnCD89e1U7zRjeXl0j/Sn4Giw+Kdt1PLB1iIFJAACo/zzdLJp6SZzendBTbZr6SpLsDun9jekaM3ezNh3k5iEAwBgmk0l39mup9tWvT7kllZqxMlnlVXaDkwEAfo/JZNLEi6J198Bo17nPtmfqyf/uUwXP48BZiwjw1B39Wroez1l3SJk17mEAODv9Y4Jcx+sO5hqYBAAA4OxQ9AUANFpVNru+3Xtcdy7ZrpnxKTpa4+JY1wh/PXdVO824qr26RPgbmBInVdkdWlt90cXNYlK/GhdjAADAH9c+3E8LJ/TQ5MGxcrc4LxNk5JXp7kWJevLzPcovrTQ4IQCgMXKzmPXg0FiF+rhLkpJOlGj22kNsrwsA9cSo7pF69K+tZaneQmT1gWz9/T+7VVxeZXAyoP4Z3DpEA+OCJUmllXa99P1BVdoozgN/RO+WgXKrfm3acChPVUycBwAA9QRFXwBAo1Nls+ubPcd055IdenV1ymmr37tF+uv54e31zFXt1ImCb52y7XC+CsqcNwJ6tQyUj7vF4EQAADQcVotZ4/tHafGdvdS9ZYDr/OeJmRo1Z7NW7j5GsQoAUOsCvdw0/dJWru2q1yTn6JPtWQanAgCcqb+0b6Jnhrd3PY9vS8/X1OU7lVNcYXAyoP65s29Lhft7SJKST5RoUcIRgxMB9ZOPu0UXNHfe/ysst2nH0UKDEwEAAJwZir4AgEaj0mbX13uO6Y4Pt+u1NanKKjxV8L2gub9euLq9/nVlO3UM9zMwJX5N/IFs1/GguBADkwAA0HBFhXhrzrgL9MiwtvLxcC6qyS6u0MMf79IDy3bqGNuDAgBqWUyIt+6psf37B1sztCUtz7A8AICz0ycmSC9f30n+nlZJ0oFjxZq8ZIeO5JUZnAyoX7zcLZo2OEbW6kmkn+7I0o+H8w1OBdRP/WvsGLkuJdfAJAAAAGeOoi8AoMGrtNn11W5nwff1Nak6VnRqYkT35gF68er2evqKdurQjIJvXVVUXqWt6c6LlgGeVnVrzrRlAADOF7PJpOu6R2jZxAs1sE2o6/zq/Sc08q1N+ighQ3am+wIAalGf6CCN6R4hSXJIeiU+RWm5pcaGAgCcsY7hfnp9VGeF+bpLkjLyy3T3ku1KOl5scDKgfmkV6qNxvSJdj19bnaqckkoDEwH1U+dwP/lVL3DfdrhAxRU2gxMBAAD8Poq+AIAGq9Jm15e7snTHh9v1xg+pOl6j4NuzRYBeuqaDnrqirdpT8K3z1qfkqtLmLBRd1CrYNbUAAACcP038PfTiyE6aMaKjQnycN+SLy2169qv9mvTeNqVmlxicEADQmIzs1kz9qidvlVXa9eyKJBWWVRmcCgBwpqJDvDVrTBdFBXtJknJKKnXP0h36iYmkwFm5smMT9WgRIEkqKKvSa6tTWIwLnCWr2aS+0c7PFpV2hzazYwgAAKgHKPoCABqcSptdX+zK0u2Lt2vW2kOnF3xbBujlazvoyWFt1a6pr4EpcTbik7JdxwPjQgxMAgBA42IymXRJ+yZaOqm3hncNd53/MS1fY+du0fy1qaqy2Q1MCABoLEwmk6ZcHK2YEGdBLKuwQi+sOqgqO8UWAKgvmvh56PVRndUh3Dl4objCpgc/3qU1Na79AfhtJpNJky+KUpC3myRp+5FC/Wd7lsGpgPqnf0yg63hdSq5xQQAAAM4QRV8AQINRUWXX5zuzdNviRM1ee0gnik8VfHu3DNQr13bQk5e3VdsmFHzrk8yCcu3Ncm7j1yLQU7HVN3UBAEDtCfBy0z+uaqdZN3ZVZKCnJKnCZtes+BTdPD9Bu48UGJwQANAYeFjNmn5JnAK9rJKknUcLNX9jusGpAABnI8DLTS+P6KgLqycpVtgc+ucXe/XFjkyDkwH1R4CXm+4dGKOT+94tSsjQvqwiQzMB9U10kJciAzwkSfuPl+hYUbnBiQAAAH4bRV8AQL1XXmXXZzsydfviRM1Zd0jZxZWur/WOCtTM6zron5e3URsKvvXS6hoTPQa1DpHJZPqNPw0AAM6n3jHBWjKxt8b1aSFz9Uvy/qwijV+QoJkrk1RWaTM2IACgwQv1dddDQ1vJWv1C9PWe4/pmz3GDUwEAzoaXm0XPDG+nv7QPkyTZHdILK5P13qZ0ORxMagfOROcIP43o1kyS83fo5fgUFZdXGZwKqD9MJpP6xwS5Hq9PyTMuDAAAwBmg6AsAqLfKq+z6tLrg+9b6NGWXnCr49okO1KvXddQ//9pGrcMo+NZXDodD8dVFX5Oki1oFGxsIAADI082iqZfE6d0JPdWmqfN9lt0hvb8xXaPf2qxNB3MMTggAaOjaNvXVXQOiXI/f2ZCmnUcLDUwEADhbVotZ0y9rrdE9Ilzn3lmfptfiU2Sn7AuckdEXRKhdUx9J0vGiCs1ae4iyPHAW+kUHuiZjr03J5fcHAADUaRR9AQD1TlmlTZ9sz9RtixM1d32acmoUfPtGB+m1ER31+GVtFBfmY2BKnAt7jxUrq7BCknNCQaiPu8GJAADASe3D/bRwQg9NHhwrd4vz8kJGXpnuXpSoJz/fo/zSyt/5CQAA/HGDW4foqk5NJEk2h/TCd8nKLGC7XQCoT8wmk/52cYwmXXRq8cbHPx3VU1/tV6XNbmAyoH6wmE26b1CsfN0tkqQNqXlase+EwamA+iPY213tqxexHyuqUNKJEoMTAf/f3n2HR1Xlfxz/3Jn0CemhhJYQOggIAkpHAdcOKogoishix4ZrWVdhi6zlp664oKiURVAQxZVV7DQRBJEmIDWhJAgppPeZ+/sjMBKTQAJJZibzfj1Pnidz5+bOd/Kcc+45937vOQAAVI5EXwCAxygotuujrUd153vb9Pa6QzpxWoJvn7hwTb+xs56+vI3io0jwrS9WnZzNV5IGtY50YSQAAKAiPlaLxvVtqfcm9lT3FqHO7cu2/qpRb2zQ1zuPMxsKAKDW3NazmS5sFiJJyi60659f71N+kd3FUQEAquvmi5rpiWGtZT05reK3u1P1xMe7lFdU4trAAA8QHeyne09Llp+9/rAOnch3YUSAZ+kXF+78fW3CCRdGAgAAcGYk+gIA3F5BsV0fbj2qO9/bqnfWH1bGabPD9W0Vrtdv7Kw/D2ujVpFBLowSNa2oxKG1B0ovqvj7WNQ7Nsy1AQEAgEq1jAzSG2Mv1FNXtpPNv3QmobTcIj3x0Q5N/uBnHWeGRQBALbBaDD0yKE5NQ/0lSYdOFOjVVSz5DgCe6IpOjfS3azo4Vwv58VCGHlqyQxl5rBQCnM3FseH6Q4doSVKR3dT/fXtAhSXMig1UxUXNQ+R38kmT9QczmVEeAAC4LRJ9AQBuK7/YriVbjmr8wq2avf6wMvJLZ3AwJPVvFaF/j+ysp4a2URwJvvXSj4czlXtyJqaLY8MU6Gt1cUQAAOBMLIah67vH6IO7emtg2yjn9lV7UjXyzR/04aYkEq8AADXO5u+jJ4e2lu3kktUbD2Vq4aZkF0cFADgXfeMj9PINnRR88uHB3cdydP/ibTqaWeDiyAD3d3uvZmoZHihJOpxRoDk/HHZxRIBnCPC16qLmpatU5RXbtSUp28URAQAAVIxEXwCA28krsmvx5mSNX7hVc344rMyC3xJ8B8SXJvg+MbS1YiNI8K3PVu1Lc/4+qHWkCyMBAADV0TDEXy+N7Kx/3tBJkTY/SVJuoV3Tlu/R3fM3KzEtz8URAgDqm5jQAD16aStZTi75/tHWX7V6f7prgwIAnJMLmoZo+qgLFHVyLHH4RIHuX7Rd+1NzXRwZ4N78fSx69NI4+fuU3v7/8pdUfZ9wwsVRAZ6hX1y48/e1idQbAADgnkj0BQC4jVMJvne+t1XzNhxR1mkJvgNbR2jGqAv0+JDWakmCb72XmV+snw5nSpIignzVuUkDF0cEAACqwzAMDenQUIvv7qVruzZxbv/pUKbGzNqo2d8lqoSlEAEANahb0xDd0bu58/WMNYnam0JSGAB4olZRNv179AVqHh4gSUrNLdKkxdu1LSnLxZEB7q1ZWKDuvPi0/tB3B3U8u9CFEQGeoWOjYIUH+kiStiZlKfvk/UkAAAB3QqIvAMDl8orsev+nZI1fuKVMgq/FKJ3JdeaoC/Sny1qrxcllp1D/fXfghOwnV/YeEB8h66lpmQAAgEcJDfTVM9e014xbuqppWOlN+iK7QzNWJui22Zu0M5kb9QCAmnNlx2hd1rZ0RZgiu6nnv96v9NwiF0cFADgXjUMC9PqoLmrfKFiSlFNo16Mf7tBaZmwHzuiytpHqe3J20rwiu15ZmaASh+niqAD3ZrEYuiS2tN7YTWn9wQzXBgQAAFABEn0BAC6TW1ii9zYl6Y6FWzR/4xFlF9ollSb4Dm5TmuD72GXxak6Cr9dZtS/N+fvA1pEujAQAANSEXnERWnRXL429uLlzWfU9x3I0bs4mvfr1PhUU210bIACgXjAMQxP7tFCHk0lh6XnFev6b/SosYRZ5APBEYUG+euXGzurZMkxS6UODf1m2S5/tOObawAA3ZhiG7u7XUo0a+EmSdh/P1fs/Jbs4KsD99TuZIC9JaxNPuDASAACAipHoCwCoczmFJVq4KUl3LNyqd39MUs5pCb6XtY3UG6Mu0ORL49UsjARfb3QkI1/7UvMkSa0ig9QygnIAAEB9EOBr1YNDWmve+IvU9mQClsOU3l1/WDe9uUEbEpiZCwBw/nytFj12WStF2UqTW/am5GnmdwdlmsxkBwCeKMjPqmnXddBl7aIklc60+PyX+7Rw4xHadqASNj+rHhncStaTD9ou3fqrtiaxog5wJs3CAtQyvHQ1qgNp+UrOLHBxRAAAAGWR6AsAqDM5hSVa8OMRjV+4VQt+TFJu0W8JvkPaRunNm7rokcHxakqCr1dbue+3JJ+BrSNcGAkAAKgNHZo00H/G99B9g1vJz1p6WSIpo0D3Ltiqqct2KTO/2MURAgA8XVigr54cGq8An9LzzOr96Vq6jdkfAcBT+VotevqKtrqhWxPntje/O6gZqxPlINkXqFCbaJtuuaipJMmU9K9VCcpgvA2cUd8ys/pmuC4QAACACpDoCwCoddmFJZq/8YjuWLhVCzcll0nwHdouSrNu6qKHB7dSTGiAiyOFqzlMU6v3pUkqLR/940n0BQCgPvKxWnRH35Z6b2JPdW8R6ty+bOuvGvXGBn298zizcwEAzktcZJAmDYx1vl7wY5I2HspwWTwAgPNjMQw9MChOE/q2cG5b/FOynvt8r0rsDhdGBrivay9opG5NQyRJGfklmk5yPHBGF7cMk+XkTNjfJ5ygvgAAALdCoi8AoNZkF5Qm+I5fuFXv/5SsvJMJvlaLoWHtozVrdBc9NKiVmpDgi5N2HM1Ram7prAIXNgtVWKCviyMCAAC1qWVkkN4Ye6GeurKdbP5WSVJabpGe+GiHJn/ws45nFbo4QgCAJ7s4Nlyju5fO/mhKemVlgg6dyHdtUACAc2YYhsb2aq7HhsQ7E7G++iVFT33yi/KL7a4NDnBDFsPQpIGxCgv0kSRtPpKlZT8fd3FUgPsKC/TVBU0aSJLS8oq1+3iuiyMCAAD4DYm+AIAal1VQrHkbDuuOhVvKJfhe3j5as27qogcHxqlJCAm+KGvlydl8JWlga2bzBQDAG1gMQ9d3j9EHd/XWwLZRzu2r9qRq5Js/6MNNScygAgA4ZyO7NVGfk0vwFhQ7NO2rfcouKHFxVACA83H1BY3116vby89amu37Q+IJPfLhDmXmF7s4MsD9hAX6atLAOOfrdzce0d4UkheByvSNDXf+/l3CCRdGAgAAUBaJvgCAGpOZX6y5PxzW+IVbtXjzUeUXly6Z5mMx9IcO0XprdBdNGhinxiH+Lo4U7qiwxKF1iaUXTYJ8LerZIsy1AQEAgDrVMMRfL43srH/e0EmRNj9JUm6hXdOW79Hd8zfrYFqeiyMEAHgiwzD0wIBYxUUGSpKOZRfpxW8PqMTBQyQA4Mn6t47Ui9d3ks2vdGWQnUez9cDi7TqezaogwO91axqiEV0aSZLspvTyigPOCVoAlNW9WYgCfUvTaDYeylRhicPFEQEAAJQi0RcAcN4y84s152SC7wdbyib4Xtmxod4a3UUPDIhTowYk+KJyPxzMUMHJstMnLkL+PnRTAADwNoZhaEiHhlp8dy9d27WJc/tPhzJ186yNmrP2oErs3GABAFSPv49FTw5p7Vy2+uej2Zq9/rCLowIAnK9uzUL12qgLFBHkK0k6mJ6ve9/fpkQeEgTKublHU7WJtkkqffDpzbUHZbJ6DlCOn49FvVqESpIKShz66UimiyMCAAAoRQYNAOCcZeQXa/b6Qxq/cKuWbDmqgpLfEnyv6thQb9/cRff1j1VDEnxRBSv3pjl/H9g6woWRAAAAVwsN9NUz17TXjFu6qmlYgCSpyO7Qv1cc0G2zN2lncpaLIwQAeJqoYD89flm8fCyly7x/vitFX+xKcXFUAIDz1Trapn/f1MU5bkjJKdIDi7frZ8YMQBk+FkOPDI5T0MmZStccOKFvT7smD+A3fWPDnb+vTchwXSAAAACnIdEXAFBtJ/KK9c660gTfD7f+WibB9+pODfXOzV11b/9YRQeT4IuqSc8r1raTF98bBvupQ+NgF0cEAADcQa+4CC26q5fGXtxcJ/OytOdYjsbN2aRXv96ngmKWGgUAVF27RsG6p19L5+u31x3Sz0ezXRgRAKAmxIQF6PVRF6hNw9LZSrMKSvTIhzu0PuGEiyMD3EujBv6/6wsd1pGMAhdGBLintg1tirKVzha//ddsZeQXuzgiAAAAEn0BANWQnlekt9cd0p3vbdVH235V4ckEX1+roWs6N9I7Y7rqnn6xigr2c3Gk8DRr9qfLcXKVsIGtI2UxDNcGBAAA3EaAr1UPDmmteeMvUttGpQ8DOUzp3fWHddObG7QhId3FEQIAPMngNpG6pnNDSZLdlF78Zr9+zSp0cVQAgPMVYfPTv27srO7NS5dbLyxx6KlPdunLXcddHBngXvq2itDQdlGSSuvJ/604oKKT93oAlLIYhvqcnNXXNKV1iRmuDQgAAEAk+gIAqiA9t0izvj+oOxdu1dLTEnz9rIau7dxI79zcVXf3bakoGwm+ODcr9/22RNjA1hEujAQAALirDk0a6D/je+i+wa3kZy29nJGUUaB7F2zV1GW7lMnsKgCAKrqtZzNd2CxEkpRdDDvKKgAAQhxJREFUaNc/v96n/CJmiQcAT2fz99HzwztqYJtISZLdYeofn+/Vok1JLo4McC/jL26u5mEBkqSD6fmat/GIiyMC3E/fuHDn72uZIR4AALgBEn0BAJVKyy3SrLUHded7W/Xf7cdUZC+dctXPaui6C0oTfO/q21KRJPjiPCSk5elger4kqW20TTGhAS6OCAAAuCsfq0V39G2p9yb2VPcWoc7ty7b+qlFvbNDXO4/LNE0XRggA8ARWi6FHBsWpaai/JOnQiQK9uipBDs4hAODx/HwsevbKdhretbFz24zViXpjTSJjBeAkfx+LHhncSn7W0pX1lu9M0YaDGa4NCnAzTUL8FR8ZJEk6lFGgQyfyXRwRAADwdiT6AgDKSc0t0hunEnx//i3B19/HohFdGuudMV01sU9LRZDgixpw+my+g07OtgEAAHAmLSOD9MbYC/XUle1k87dKKn1I7YmPdmjyBz/rOEuwAwDOwubvoyeHtpbNr/Q8svFQphZuSnZxVACAmmC1GHpocCvdcXFz57b3fkzS81/tU4mDZF9AklpGBOqO3r/VkddXJyo1p8iFEQHup29cmPN3ZvUFAACuRqIvAMApNadIM79L1IT3tmrZz8dU/PsE35u7asIlLRQRRIIvaobdYWrN/nRJko/FUN9W4Wf5CwAAgFIWw9D13WP0wV29NbBtlHP7qj2pGvnmD/pwUxIzMwIAzigmNECPXtpKltLJ7PTR1l+1+uQYFQDg2QzD0LhLWuiRS1vpZDOv5TuO6y/Ldqmg2O7S2AB3Max9lC6ODZMk5RTZ9erKBNlJhgecercMk/XkYOH7xAzqBwAAcCkSfQEASskp1Iw1ibrzva36347jZRJ8b+jaWLPHlCb4hgf5ujhS1Ddbk7OUkV8iSerRPFQN/H1cHBEAAPA0DUP89dLIzvrnDZ0UeXLFidxCu6Yt36O752/WwbQ8F0cIAHBn3ZqGaFyvZs7XM9Ykam9KrgsjAgDUpOu6NtGUq9rJ13oyUevACU3+aIeyC0pcHBngeoZh6N5+LRV1ciy981iOPthy1MVRAe6jgb+PusU0kCRlFpRox685Lo4IAAB4MxJ9AcCLHc8u1L/XJGrCe9v06c7jzmXLAnwsurFbE80Z01XjL26hsEASfFE7Vu1Nc/4+qHWkCyMBAACezDAMDenQUIvv7qVruzZxbv/pUKZunrVRc9YeVInd4cIIAQDu7KpODXVZ29IxaZHd1PNf71d6LktXA0B9MahtlF4Y3lFBflZJ0vbkbD2weLtScgpdHBngesH+PnpkcJxzhYMlW45qx9Fs1wYFuJG+cb+tRLk24YQLIwEAAN6ORF8A8ELHswv1+uoE/fH9bfrstATfQF+LRnZrotljuuqO3s0VSoIvalFekV0/HMyQJAX7W9W9eYhrAwIAAB4vNNBXz1zTXjNu6aqmYQGSpCK7Q/9ecUC3zd6knclZLo4QAOCODMPQxD4t1KFRsCQpPa9Yz3+zX4UlPCQCAPVF9xZh+teNnZ2r1iWk5em+97frUDorgADtGwVrdPcYSZLDlF5ZmaAsZr0GJEldYxrIdvJBkU1HMpVfbHdxRAAAwFuR6AsAXuRYdqFeW1Wa4Lt8V0qZBN9RF5Ym+I4jwRd1ZF3iCRXZS8tgv1YR8rXSLQEAADWjV1yEFt3VS2Mvbu6clWjPsRyNm7NJr369TwXclAEA/I6v1aLHLmvlXLp6b0qeZn53UKZpujgyAEBNadsoWK/fdIGahPhLKr1efv/i7dr1K7OXAiO6NNYFTRpIKn3o6fU1ifSDAJWOE3q3DJNUuvrHj4czXRsQAADwWmTUAIAXOJpVoH+dTPD94peyCb6ju8dozphuur1Xc4UEkOCLurNqX5rz90GtI10YCQAAqI8CfK16cEhrzRt/kdqenKHRYUrvrj+sm97coA0J6S6OEADgbsICffXk0HgF+JReNl+9P11Ltx1zcVQAgJrULCxQ/76pi1pH2yRJmfklenjJz9p4kOXY4d2sFkMPDoxVSICPJOnHQ5n6bGeKi6MC3EO/uDDn798lcL4AAACuQaIvANRjR7MK9OrKA5r4/jZ9+UuK7CcTfIP8rM4E37E9m6nByQs3QF05nl2on4/mSJJiQv3VJjrIxREBAID6qkOTBvrP+B66b3Ar+Z1cQSApo0D3Ltiqqct2KSu/2MURAgDcSVxkkCYNjHW+XvBjkjYeynBZPACAmhcZ7Kd/jeysrk1DJEn5xQ498fEuff0LSY3wbhE2Pz0wINb5et6GIzqQmue6gAA3ER8ZpEYNSlf++OVYrlJzi1wcEQAA8EYk+gJAPZScWaBXVpQm+H61O1Un83tl87NqTI8YzR7TlQRfuNTq/b/NoDcwPlKGYbgwGgAAUN/5WC26o29LvTexp7q3CHVuX7b1V418Y4O+3nmcJUkBAE4Xx4ZrdPcmkiRT0isrE3ToRL5rgwIA1Khgfx+9eH0n9YuPkCSVOEz9bfkeLdmc7OLIANfq0TxU13RuKKm0XvzfigPKL7a7OCrAtQzDUN/YcEml44N1iRkujQcAAHgnEn0BoB5JyizQyyv2665F2/T1nrIJvrdc1FSzx3TVLRc1UwN/EnzhOqZpatW+NOfrga0jXBgNAADwJi0jg/TG2Av11JXtZPO3SpLScov0xEc7NPmDn3U8q9DFEQIA3MXIbk3UJ670Zn5BsUPTvtqn7IISF0cFAKhJ/j4WTb26va7u3Mi5bfrKBL299iAPAsKr3XpRU8VHla7CdzSrUG+vO+ziiADX63tybCBJ3yWc4DwBAADqHJleALyaaZqy14NxWHJmvhZvPqrV+9Nkd/y2PdjfquEXNNa1nRvJRnIv6lhl9etAaq6SMkuTaDo1DlbDBv51HBnqE7tpiutp588wJCsza+McnG9fymJIFsoe6pjFMHR99xj1ax2p5z/fo1V7UiVJq/ak6seDJ3T/pfG6rlsTtyubVkOsgoDzVl/GwKejbqAm2B2mKqoa9/RrqaNZBUpMz9fxnCK9+O0BPTWstXwsdVvmKOdwNYdpOicUqEmUbdS2ql43eujSVgq3+WrBhiRJ0oKNSUrPK9akwa3qvM2vDsbUqC2+VoseGRynRz/epYJih1bsTVOXmAYa2DpSUuV9p98zVFpOaevhbs7lvkJ4kK/aNbRpT0qufs0u1L60PMVFBNVOgKKfBPd2+vUlw/jtx26a3GsCgFpkmFV41CgrK0uhoaHKzMxUSEhIXcQFALUqtaBEq5PzlJBdpGLH2ff3JKZpqrCgWM0CLBrTOVpBflZXhwQvszujUBuO5ys5t6TCi30WmUo9kafDSRma0LupLmsbVecxwrMVO0ytSs7VLxlFyqlvjbgLNfC1qEO4vwY0CXLrm1hwD/syi/TD8Twdyam4ra+OSH+rukT6q1fDQC5eo86Zpqmvd6Xo/Z9TFBMTJpvNfR9A8jGkuBA/DWgSpOhAHuJD9ezNLNQPx/KVVEkf3ZNZDallA1/1bRykpjZfV4cDD5Jf4tDK5FztySxSfol71ww/i6H4EF8NjLEpzJ/rPKgbDtPU2l/ztCO9UBlFtTP2PtW/6d8kSA3p36CGFNodWpWcp90Zhcp18/a9JkQGWNUlgjE1aseqfWn616pESVJwoI+u6dlSSfl2FVTj6UF/q6HWIX4aGBOkEO5XwYU87b4C/SS4o6TcYq09mqeDOcWVPkhu8zHULsxfA2OC5G9lkXkAOJvq5OWS6AvA62QV2TV3d4b8LIa6RAYoxK9+dTCLHab2ZRZpf1axrmgRrK6RAa4OCV5kd0ahliZkq2Wwr9qF+cnPWvbismlKWcUObU7JV2ZBie7qFKGoIG7Go+pM09Ti/Vk6klusrpEBahjoI3JSz5/DlH7NK9G2tAK1CvHT9a3o86NyB7KK9MH+LDW1+ah9uL8CrOdeCe0O6VBOsXacKFSfRoEaEGOrwUiBqln7a57WHM1ThzA/xTbwk7tef84ucmhbeoEK7KbGtQtTKDdIUUX7M4u05ECWmgX7qH2Yv/zPo912R7nFDv2cXqjMIodubRvKDVBUicM0NW93hjKLHOoWGaDIAKvcNTfKNKXMIoe2pRVIksa1D1OQj5uerFCvLD+UrW1pheoS6a8Ym69q4/SRXeTQ9vRC5Zc4dHu7MBLZcd5M09SCvZlKybera1SAot24fa8JZcbUjQM1oAljatS86asTtXJfurp0bKLQID91jwpQuH/V6pZpShlFdm1NLZSvVRrXLoykL7jMon2ZHnVf4VQ/Ka/EoXH0k+AGjueXaP6eDIX7W9Up3F823/LtuWlKKQV2bU0tUHSgVbe0CeVBJAA4i+rk5XLlG4DX2ZFeqGKHqQntwxVUQQe0PugWGaClCdnaeDyfRF/UqY3H89Ui2FejW4ecceB2QYS/Zu44oUO5JST6olpSC+xKyC7WdbEN1CHcfWdd9ERdIqXGQT767FCOMgrtXDhEpX5MyVfjIB+NaRNaI0uEdo0KkM3Xok0pBerTmBmlUbfspqkfj+erR3SAhjYLdnU4Z9UtKkAzd5zQz+mF6tu49paHRP2yMSVfMTYf3dy6Ztptd3RhVKDe2nVCW1ILNKy5+9dluN6h7GIdy7drTJtQtQj2jDFpl8jScfQvJwrVPTrQ1eGgnssvcWhbWqEGxQSpd6Pa7XNcGBWgGSf7N/2a0L/B+UnOK9GR3BKNbBWi+FA/V4dTJ7pGBSjIx9CmlAL1bRQkK2Nq1LAJlzTXwdxi+fv76ubWoWoUVP30go7h/nprV4b2ZRapUwT3rFD3UvJLPPK+woVRAZq584S2pxeoPw9zwMW2pBYowGrR2LZh8j1Lf6NlsK8+OJClo3klimH1JQCoMfUzww0AziApr0TNbL71NslXkgzDUNswP6UW2FVod//lZ1A/mKap5LwStQ31O+vTmSF+VjUJ8lFSbkkdRYf6IvlkmWnjJTdr6lrbk//XpNxiF0cCd5acW6I2oX41mizWNtRPhQ5TaQX2GjsmUBUZhXbl201n++fuAn0sah7sQzuNaqmNdtvd+FkNxTXwdfYVgbNJyitRoNVQc5vnzIPBOBp16de8EpmS2obVfiJMgI9FLRr40r9BjUjOLTm51Ll3JZS0C/NXod1UWiFjatS8QF+r+rWJls3HOKckX0mKDPBRVICVfgxcxlPvKwT4WNQi2Je6A7eQlFusViG+Z03ylUr7Yj6GdISyCwA1qv5muQFAJewOU35uslTplClTZBiGUlNTa/zYfic72SXk+aIOOUxVWL/mzp0rwzCUmJjo3OZnNVRimnUYHeqDEtOUjyFm/DxPK1eulGEYWrlyZZntp+pvCVUTZ1BSzb7Uqf7OKbGxsRo3blyZfX4rexQ+1K1TfeXfLx1aUd/FXfhZDPr4qJYS03SOD09Xm+NRV2B8geo4dW2ooodUN27cqD59+shms8kwDG3ZsqXuA6yEn9WQnXKOOnCqPa3o/FEb/C204agZJQ5TvhajRh5wMgxDU6ZMcb52576Tc0ztoB6hdtgCfBRgPb+0Aj/aerhQiWnK6qH3FUqvA1F34HolZtXHBxbDkK/FkJ2yCwA1ikRfAPidGTNmyDAM9e7d29WhnJd6PFkT6gmKKOB+qJdwFcoe6ouFCxfq1VdfLbc9OTlZU6ZMqZFksbOtnAAAOHfFxcUaOXKk0tPT9corr2j+/Plq2bJllf72+++/15QpU5SRkVHuveeee04ff/zxecfHGQAAzs2Z2mhXcte4gIp4YH4kcFanHvT+8ccfXR1KpbgMBI9F2QWAGkeiLwD8zoIFCxQbG6sNGzZo3759rg4HqBfGjh2r/Pz8Kt8gBVC7BgwYoPz8fA0YMMDVocAL7d69W2+99ZarwwBqxZkSfadOnepWs0ICAMrbv3+/Dh48qMmTJ2vixIm69dZbFR4eXqW//f777zV16tRaTfQFAJybM7XRlcnPz9fTTz9de0Hp3OICAAAAAHgnEn0B4DQJCQn6/vvv9fLLLys6OloLFixwdUhAvWC1WhUQEMAMdICbsFgsCggIkMXCcAB1z9/fX76+vq4OAwAAoJzjx49LksLCwlwbCADAJRwOhwoKCiRJAQEB8vHxcXFEAAAAAACU4s4+AJxmwYIFCg8P11VXXaUbb7yxXKJvYmKiDMPQSy+9pFmzZik+Pl7+/v7q2bOnNm7cWO54v/zyi0aNGqXo6GgFBgaqXbt2+vOf/1xuv4yMDI0bN05hYWEKDQ3VHXfcoby8vHL7vfvuu+rRo4cCAwMVERGh0aNH6/DhwzX3DwBqyanljxITE10dCuqxpKQk3XnnnYqJiZG/v7/i4uJ0zz33qKioSJJ04MABjRw5UhEREQoKCtLFF1+sTz/9tMwxVq5cKcMwtHjxYk2dOlVNmzZVgwYNdOONNyozM1OFhYV66KGH1LBhQwUHB+uOO+5QYWFhmWMYhqH7779fCxYsULt27RQQEKAePXpo9erVZfY7ePCg7r33XrVr106BgYGKjIzUyJEjK6wn27Zt08CBAxUYGKhmzZrp73//u+bMmVOuXsXGxurqq6/Wd999p169eikgIECtWrXSf/7znwq/58qVK8/9Hw5U4LvvvlPPnj0VEBCg+Ph4vfnmm+X2iY2N1bhx4+o+OOA8zZgxQ506dZK/v79iYmJ03333lZl5a9CgQfr000918OBBGYYhwzAUGxurlStXqmfPnpKkO+64w/ne3LlzJUlr1qzRyJEj1aJFC/n7+6t58+Z6+OGHlZ+f74JvCW90tvHonDlzdOmll6phw4by9/dXx44dNXPmzHLHqWo/5NTYYO3atXrkkUcUHR0tm82mESNGKCUlpdxxz1b3gJoybtw4DRw4UJI0cuRIGYahQYMGadu2bRo3bpxatWqlgIAANW7cWOPHj1daWprzb6dMmaLHHntMkhQXF+ds609dR8rNzdW8efOc20/vC23evFlXXHGFQkJCFBwcrMsuu0zr16+v0+8OVNW4ceMUGxtbbvuUKVPKPNx9alz88ccfq3PnzvL391enTp30+eef12G0QKmztdGnruGc6m+cKqeGYWjKlCnljpeamqpRo0YpJCREkZGRevDBB53JwdJv9xBO9fdPd/oxzxSXJJWUlOhvf/ub8x5EbGysnnrqqXLXoQBXqc61TcBTVbXvI1W9/0PdAQAA54pHUQHgNAsWLND1118vPz8/3XzzzZo5c6Y2btzovDF/ysKFC5Wdna277rpLhmHohRde0PXXX68DBw44Z6jbtm2b+vfvL19fX02cOFGxsbHav3+/li1bpn/84x9ljjdq1CjFxcVp2rRp+umnn/T222+rYcOGev755537/OMf/9Bf/vIXjRo1ShMmTFBKSoqmT5+uAQMGaPPmzcw2A8CrJScnq1evXsrIyNDEiRPVvn17JSUlacmSJcrLy9OJEyfUp08f5eXladKkSYqMjNS8efN07bXXasmSJRoxYkSZ402bNk2BgYF64okntG/fPk2fPl2+vr6yWCw6ceKEpkyZovXr12vu3LmKi4vTM888U+bvV61apUWLFmnSpEny9/fXjBkz9Ic//EEbNmxQ586dJUkbN27U999/r9GjR6tZs2ZKTEzUzJkzNWjQIO3cuVNBQUGSShOYBw8eLMMw9OSTT8pms+ntt9+Wv79/hf+Lffv26cYbb9Sdd96p22+/XbNnz9a4cePUo0cPderUqRb++0Cp7du3a9iwYYqOjtaUKVNUUlKiZ599Vo0aNXJ1aMB5mzJliqZOnaohQ4bonnvu0e7du51jhbVr18rX11d//vOflZmZqSNHjuiVV16RJAUHB6tDhw7661//qmeeeUYTJ05U//79JUl9+vSRJH3wwQfKy8vTPffco8jISG3YsEHTp0/XkSNH9MEHH7jsO8N7nG08OnPmTHXq1EnXXnutfHx8tGzZMt17771yOBy67777yhyrOv2QBx54QOHh4Xr22WeVmJioV199Vffff78WLVrk3KcqdQ+oKXfddZeaNm2q5557TpMmTVLPnj3VqFEjffXVVzpw4IDuuOMONW7cWDt27NCsWbO0Y8cOrV+/XoZh6Prrr9eePXv03nvv6ZVXXlFUVJQkKTo6WvPnz9eECRPUq1cvTZw4UZIUHx8vSdqxY4f69++vkJAQ/elPf5Kvr6/efPNNDRo0SKtWrVLv3r1d9v8Aztd3332njz76SPfee68aNGig1157TTfccIMOHTqkyMhIV4cHL3KmNlqSvv32Wy1evFj333+/oqKiKkzoOt2oUaMUGxuradOmaf369Xrttdd04sSJcg83nW9cEyZM0Lx583TjjTfq0Ucf1Q8//KBp06Zp165dWrp0aTX/C0DNq+q1TcCbVKX/Q90BAADnzKyCzMxMU5KZmZlZld0BwK29vzfD/OhA+fbsxx9/NCWZX331lWmapulwOMxmzZqZDz74oHOfhIQEU5IZGRlppqenO7f/97//NSWZy5Ytc24bMGCA2aBBA/PgwYNlPsfhcDh/f/bZZ01J5vjx48vsM2LECDMyMtL5OjEx0bRareY//vGPMvtt377d9PHxKbfdNE1zT0aBOe2nFDOnyH6mfwdQYxwOhzntpxRza2p+uffmzJljSjITEhKc2yqri8CZ/Hg8z3xxc0q57bfddptpsVjMjRs3lnvP4XCYDz30kCnJXLNmjXN7dna2GRcXZ8bGxpp2e2lbuWLFClOS2blzZ7OoqMi5780332wahmFeccUVZY59ySWXmC1btiyzTZIpyfzxxx+d2w4ePGgGBASYI0aMcG7Ly8srF+u6detMSeZ//vMf57YHHnjANAzD3Lx5s3NbWlqaGRERUa5etWzZ0pRkrl692rnt+PHjpr+/v/noo486t536nitWrCj3v5r2U4q5pYJ6DJzy4uYUc+Px8uV3+PDhZkBAQJm+z86dO02r1WqePvRs2bKlefvtt5f52+N5xea0n1LMIzlFJlCXfs0tLXtHc4vLbD+973L8+HHTz8/PHDZsmPN8YZqm+frrr5uSzNmzZzu3XXXVVeXOC6Zpmhs3bjQlmXPmzCn3XkXng2nTppmGYZQbS5imaf43IctcsCejGt8S3u75zSnmpgra7aqORysqo5dffrnZqlWrMtuq2g85Vb+GDBlSZnz88MMPm1ar1czIyHD+bVXrnmma5leHs823dqabQFWsSsoxZ/ycVm77qX7yBx984NxWUR147733ypX3F198sVz//BSbzVau/2Oapf0nPz8/c//+/c5tycnJZoMGDcwBAwaU2//9vRnmUsbRqAOVXVe8/fbbK+zrnDqnnCLJ9PPzM/ft2+fctnXrVlOSOX369HJ//0lClvnunhM1Fj+81/dHc81Xt6aW215ZGy3JtFgs5o4dO8r9jSTz2Wefdb4+Vc6vvfbaMvvde++9piRz69atpmn+dg+hor7/749ZWVxbtmwxJZkTJkwos33y5MmmJPPbb78ts/3YyTF1EmNq1JIvD2ebb/+ur13Va5unzPvlhPnpwaxaixE4kx+P55kvVHBf4dT4tKL7CqZZ9b6PaVa9/1PdurMsMcucv/tEZV8NqDOzdqabXx/OrvL+r25LNb8/mluLEQFA/VCdvFxLrWQPA4AHWrBggRo1aqTBgwdLKl1i5aabbtL7778vu91eZt+bbrpJ4eHhztenZuU6cOCAJCklJUWrV6/W+PHj1aJFizJ/+/ulXCTp7rvvLvO6f//+SktLU1ZWliTpo48+ksPh0KhRo5Samur8ady4sdq0aaMVK1ac57cHAM/lcDj08ccf65prrtFFF11U7n3DMPTZZ5+pV69e6tevn3N7cHCwJk6cqMTERO3cubPM39x2221lZojr3bu3TNPU+PHjy+zXu3dvHT58WCUlJWW2X3LJJerRo4fzdYsWLXTdddfpiy++cJ5TAgMDne8XFxcrLS1NrVu3VlhYmH766Sfne59//rkuueQSdevWzbktIiJCt9xyS4X/j44dOzrPS1LpTDDt2rVznqOA2mC32/XFF19o+PDhZfo+HTp00OWXX+7CyIDz9/XXX6uoqEgPPfSQLJbfLqP88Y9/VEhIiD799NPzOv7p54Pc3FylpqaqT58+Mk1TmzdvPq9jA1VxtvHo6WU0MzNTqampGjhwoA4cOKDMzMwyf1udfsjEiRPLjI/79+8vu92ugwcPSqr9ugdU1el1oKCgQKmpqbr44oslqUy/vbrsdru+/PJLDR8+XK1atXJub9KkicaMGaPvvvvOWQ8BTzRkyBDn7NWS1KVLF4WEhDA2hdsZOHCgOnbsWOX9f7+iwQMPPCBJ+uyzz2osplPHeuSRR8psf/TRRyWJfhDcQlWvbQLepCr9H+oOAAA4VyT6AoBKb668//77Gjx4sBISErRv3z7t27dPvXv31rFjx/TNN9+U2f/3ybunkn5PnDgh6beE31PLs5/N2Y63d+9emaapNm3aKDo6uszPrl27dPz48Wp+YwCoP1JSUpSVlXXGNvfgwYNq165due0dOnRwvn+637fLoaGhkqTmzZuX2+5wOMolubRp06bcZ7Vt21Z5eXlKSUmRJOXn5+uZZ55R8+bN5e/vr6ioKEVHRysjI6PM8Q4ePKjWrVuXO15F2yqKXSo9r5w6pwC1ISUlRfn5+RWW/YrqHuBJTp0jfl+W/fz81KpVq3LnkOo6dOiQxo0bp4iICAUHBys6OloDBw6UpHLnF6A2nG08unbtWg0ZMkQ2m01hYWGKjo7WU089Jal8Ga1OP+Rsn1vbdQ+oqvT0dD344INq1KiRAgMDFR0drbi4OEnn106npKQoLy+v0nGKw+HQ4cOHz/n4gKsxNoWnONWmV9Xvx73x8fGyWCxKTEyssZgOHjwoi8VS7tpP48aNFRYWRj8IbqGq1zYBb1KV/g91BwAAnCsfVwcAAO7g22+/1dGjR/X+++/r/fffL/f+ggULNGzYMOdrq9Va4XFM0zynzz/b8RwOhwzD0PLlyyvcNzg4+Jw+FwBQscra5Zps/x944AHNmTNHDz30kC655BKFhobKMAyNHj1aDoej2serjRgBALXLbrdr6NChSk9P1+OPP6727dvLZrMpKSlJ48aNO6/zAVBVZ+o77N+/X5dddpnat2+vl19+Wc2bN5efn58+++wzvfLKK+XKaHX6IfRZ4ClGjRql77//Xo899pi6deum4OBgORwO/eEPf6CdhtepaKUySeVWQ5No5+E5Tp9Z8Vz8vl5Up55U99iAO6mta5uAO6lum16V/g91BwAAnCsSfQFApYm8DRs21L///e9y73300UdaunSp3njjjSof79SSiz///HONxBcfHy/TNBUXF6e2bdvWyDEBoL6Ijo5WSEjIGdvcli1bavfu3eW2//LLL873a9LevXvLbduzZ4+CgoIUHR0tSVqyZIluv/12/d///Z9zn4KCAmVkZJT5u5YtW2rfvn3ljlfRNsBVoqOjFRgYWGHZr6juAZ7k1Dli9+7dZZZWLyoqUkJCgoYMGeLcVtkNoMq2b9++XXv27NG8efN02223Obd/9dVXNRE6cN6WLVumwsJCffLJJ2VmJlqxYkWtf3Z16h5QW06cOKFvvvlGU6dO1TPPPOPcXlGf50zJWBW9Fx0draCgoErHKRaLpdyKIoCrhYeHlxuzSuVXyQHcTU0mzO7du7fMLMD79u2Tw+FQbGyspN9WKfh9XamonlQWV8uWLeVwOLR3717nalSSdOzYMWVkZNT4dSzgXFT12ibgyWqj70PdAQAA58ri6gAAwNXy8/P10Ucf6eqrr9aNN95Y7uf+++9Xdna2PvnkkyofMzo6WgMGDNDs2bN16NChMu+dy6wV119/vaxWq6ZOnVru703TVFpaWrWPCQD1hcVi0fDhw7Vs2TL9+OOP5d43TVNXXnmlNmzYoHXr1jm35+bmatasWYqNjVXHjh1rNKZ169bpp59+cr4+fPiw/vvf/2rYsGHOp/qtVmu5Nn369OnlZgO4/PLLtW7dOm3ZssW5LT09XQsWLKjRmIHzYbVadfnll+vjjz8u0/fZtWuXvvjiCxdGBpy/IUOGyM/PT6+99lqZdvudd95RZmamrrrqKuc2m81W4TKLNptNUvmb/afOCacf1zRN/etf/6rJrwCcs4rKaGZmpubMmVPrn12dugfUlorqgCS9+uqr5fatrK0/9V5F54Bhw4bpv//9b5nl3o8dO6aFCxeqX79+CgkJOa/4gZoWHx+vzMxMbdu2zbnt6NGjWrp0qQujAs7uTG10df1+spDp06dLkq644gpJUkhIiKKiorR69eoy+82YMaPKcV155ZWSyp9vXn75ZUmiHwS3UNVrm4Anq42+D3UH9U1eXp5++eUXpaamujoUAKj3mNEXgNf75JNPlJ2drWuvvbbC9y+++GJFR0drwYIF6t27d5WP+9prr6lfv37q3r27Jk6cqLi4OCUmJurTTz8tk6xVFfHx8fr73/+uJ598UomJiRo+fLgaNGighIQELV26VBMnTtTkyZOrdUwAqE+ee+45ffnllxo4cKAmTpyoDh066OjRo/rggw/03Xff6YknntB7772nK664QpMmTVJERITmzZunhIQEffjhh7JYavb5t86dO+vyyy/XpEmT5O/v77yZM3XqVOc+V199tebPn6/Q0FB17NhR69at09dff63IyMgyx/rTn/6kd999V0OHDtUDDzwgm82mt99+Wy1atFB6ejrLOMJtTJ06VZ9//rn69++ve++9VyUlJZo+fbo6depU5mI44Gmio6P15JNPaurUqfrDH/6ga6+9Vrt379aMGTPUs2dP3Xrrrc59e/TooUWLFumRRx5Rz549FRwcrGuuuUbx8fEKCwvTG2+8oQYNGshms6l3795q37694uPjNXnyZCUlJSkkJEQffvihTpw44cJvDPxm2LBh8vPz0zXXXKO77rpLOTk5euutt9SwYUMdPXq0Vj+7OnUPqC0hISEaMGCAXnjhBRUXF6tp06b68ssvlZCQUG7fHj16SJL+/Oc/a/To0fL19dU111wjm82mHj166Ouvv9bLL7+smJgYxcXFqXfv3vr73/+ur776Sv369dO9994rHx8fvfnmmyosLNQLL7xQ118XOKvRo0fr8ccf14gRIzRp0iTl5eVp5syZatu2bZmHXQF3U1kbfS4SEhJ07bXX6g9/+IPWrVund999V2PGjFHXrl2d+0yYMEH//Oc/NWHCBF100UVavXq19uzZU+W4unbtqttvv12zZs1SRkaGBg4cqA0bNmjevHkaPny4Bg8efE6xAzWpqtc2AU8we/Zsff755+W2jx07tsb7PtQd1DcbNmzQ4MGD9eyzz2rKlCmuDgcA6jUSfQF4vQULFiggIEBDhw6t8H2LxaKrrrpKCxYsqNbMuV27dtX69ev1l7/8RTNnzlRBQYFatmypUaNGnVOcTzzxhNq2batXXnnFmSjWvHlzDRs2rNIkZQDwFk2bNtUPP/ygv/zlL1qwYIGysrLUtGlTXXHFFQoKClJYWJi+//57Pf7445o+fboKCgrUpUsXLVu2rFZmQRk4cKAuueQSTZ06VYcOHVLHjh01d+5cdenSxbnPv/71L1mtVi1YsEAFBQXq27evvv76a11++eVljtW8eXOtWLFCkyZN0nPPPafo6Gjdd999stlsmjRpkgICAmo8fuBcdOnSRV988YUeeeQRPfPMM2rWrJmmTp2qo0ePkugLjzdlyhRFR0fr9ddf18MPP6yIiAhNnDhRzz33nHx9fZ373XvvvdqyZYvmzJmjV155RS1bttQ111wjX19fzZs3T08++aTuvvtulZSUaM6cORo3bpyWLVumSZMmadq0aQoICNCIESN0//33l0kUAFylXbt2WrJkiZ5++mlNnjxZjRs31j333KPo6GiNHz++1j+/qnUPqE0LFy7UAw88oH//+98yTVPDhg3T8uXLFRMTU2a/nj176m9/+5veeOMNff7553I4HEpISJDNZtPLL7+siRMn6umnn1Z+fr5uv/129e7dW506ddKaNWv05JNPatq0aXI4HOrdu7fefffdaj1sDtSVyMhILV26VI888oj+9Kc/KS4uTtOmTdPevXtJ9IVbq6yNPheLFi3SM888oyeeeEI+Pj66//779eKLL5bZ55lnnlFKSoqWLFmixYsX64orrtDy5cvVsGHDKsV16iHvVq1aae7cuVq6dKkaN26sJ598Us8+++w5/x+AmlTVa5uAJ5g5c2aF28eNG1fjfR/qDgAAOFeGWYU15LOyshQaGqrMzEyWCwPg8Rbty5Sf1dCIuPrdnu3NLNSHB7L1QOcI2XxrdqZKoCKmaer5LWm6skWwukSePfHQW+oiatamlHytSMrV5G5Rrg6lUoZh6L777tPrr79eq5/z0EMP6c0331ROTo5zSeHzdaoeX9EiWF2rUI/hnV7akqpBTW26KDqwxo6Zkl+id37J0Ni2oWpqI3ELdedYXonm7M7QuHZhahzkGc9Cf5KYrZxih8a0CXV1KPAQL2xJ1ZCmNnWvwXbbHX19JEeJ2cWa0CHc1aHAA6xOztWOE4W6p1OEq0OplkX7MuVvNTSccTRqWV1fV1yWmK2sYrtuaRNW65+F+m3dr3nacDxfD3bxrlkRj+eXaPYvGbqtbahiGFOjFnx1JEeHsot153n0tf+zO0NRgVZd2aJBDUYGVM2mlHx9m5Srx9z4vkJl/ncwWxmFdt3aNszVocDLvbXrhFo18NVlzYKrtP+/tqepV3SgLmkcVMuRAYBnq05eLplfAAAAgJvLz88v8zotLU3z589Xv379aizJFwAAAAAAAAAAAAAAuB/PmK4GAAAA8GKXXHKJBg0apA4dOujYsWN65513lJWVpb/85S+uDg0AAAAAAAAAAAAAANQiEn0BoJ4yTVdHAACoKVdeeaWWLFmiWbNmyTAMde/eXe+8844GDBhQo5/DqQOuQtkDqs6kow8AXoszAAAAAOBduAwEj0XZBYAaR6IvAK9jtRgqstf/nmWRo/Q7+lhcHAi8isVQletXkcNUEAUU1eRjGCoxpRKHKR+L4epwKlQbCVjPPfecnnvuuRo/7u+dqr8+7vmvhZvwqYW+1G9lj8KHunWqK1Jod7g2kGoocpj08VEtPobhHB/WZ0V2k/MIqsxqMVRoN2WapgwPKjdFdlM2TgKoA6fa0yKHKVsdfF6hgzYcNcPHYqjYYcphmrJ4UZlyjqnd9FoZPF9NjCmKaOvhQj6GIbub31eoTJEHxoz6ycdQlc8FDtNUscOUlbILADWKq4IAvE7TIB8dyS1WXrHn3MyvLtM0tSejSFEBVvlbaepRNwzDUEyQj/ZkFp010TGryK6juSVqauOZI1RPzMkyszezyMWR1E97Tv5fm9p8XRwJ3FmMzUd7M4vkqMGk9j2ZRfK3GIoMsNbYMYGqCPe3KtBqONs/d5df4tDhnBLaaVRLbbTb7qbIbiohu9jZVwTOpmmQjwrspg7nlrg6lCrLKrLraB7jaNSNxkE+MiTtySis9c8qKHHoUHYx/RvUiBibj0pMKSGr2NWh1KndGYXytxqK9GdMjdoRY/NRZpFDx/LOre+UVlCi1AI7/Ri4jKfeVygocehQTjF1B26hqc1XB7KKVVyFZN+ErGKVmFIzyi4A1ChaVQBep1OEvzam5Os/ezLUJTJAIX71KxG22GFqX2aR9mcV64oWwa4OB16mZ8NALU3I1vv7stQ2zE/+1rJPapqmlFXs0Na0Atl8LWof5u+iSOGpogKsimvgq88OZSspt1gNA33EA8Hnz2FKv+aVaFtagdqG+imMG0M4g4uiA/XB/iwt3Jup9uH+CrCeeyW0O6RDOcXacaJQfRoFMjsF6pzFMHRRw0CtOZqnvGKHYhv4yV2fk8sucmhbeoGsFqlzBH0oVF3P6EAtOZCl9/Zlql3Y+bXb7ii32KGf0wtVaDfVLSrA1eHAQ7Ro4KtGgVZ9dCBL3SIDFBlglbtOMGeaUmaRQ9vSCtTA16L24ZwDUPsCfSzqEumvlcl5Si+0K8bmq9o4fWQXObQ9vVBWg/4NakZMkI+a2Xz0SWK2ukYFKCrAWq+vG5UZUzcOZNY81JpWIX6KCrBq0f5MdY0MUIR/1fpOpillFNm1NbVQ4f4WtQ71q/1ggQpEB/p43H2FU/0kQ9IFEYx14XrdogK0Pb1A8/dkqFO4v2y+5S+iOkwptcCurakFambzUZMgUtIAoCYZZhXWFs7KylJoaKgyMzMVEhJSF3EBQK1KLSjRmqN5OpBVpPo4sW/jIB/1iArQBZEM/FD3dmcUasPxfCXnlqiiToafxVB8iK8GxNgUTjIhzkGxw9Sq5Fz9klGknPrYiLtIiK9FHcL91b9JEMmWOKt9mUX64XiejuRU3NZXR2SAVV0i/NWrYaBHLZ2N+sM0Tf2YUqCtaQVKLbC7OpxK+RilN1f7NwlSdCAXyVE9ezML9cOxfCVV0kf3ZFZDim3gqz6Ng5gNEtWSX+LQyuRc7cksUn6Je9eMU+PogTE2HspDnXGYptb+mqcd6YXKKKqdsbePIcWd7N80pH+DGlJod2hVcp52ZxQq183b95rAmBp1Jbe4tO+0N7NIBfaq1y1/q6HWIX4aGBOkED/6MXAdT7uvQD8J7igpt1hrf83TwexiVXYqsPkYahfmr4ExQaw8DABVUJ28XBJ9AXg10zQr7YR6KqshLujBLVRWvyijqEl201Q9XoW6zhiGZKVe4hycb1/KYpTOqAq4C4dpqgqrz7kEfSjUBMbAQMXsDtNtk+ANlfaZKOdwpdrqI9GGo7bV9+tGjKnhKlXtOxkSM03DLXnC+YF+EtxZZdeXuNcEANVXnbxcHv0B4NUMw5APfU2gVlC/UBeshlF6xRiAS9DWo76xGIbbL90InA/abaBiJKAAZ0YfCZ6K60ZA7aDvBE/H+QE4P1xfAgDXYJ50AAAAAAAAAAAAAAAAAAAAwA2R6AsAAAAAAAAAAAAAAAAAAAC4IRJ9AQAAAAAAAAAAAAAAAAAAADdEoi8AAAAAAAAAAAAAAAAAAADghkj0BQAAAAAAAAAAAAAAAAAAANwQib4AAAAAAAAAAAAAAAAAAACAGyLRFwAAAAAAAAAAAAAAAAAAAHBDJPoCAAAAAAAAAAAAAAAAAAAAbohEXwAAAAAAAAAAAAAAAAAAAMANkegLAAAAAAAAAAAAAAAAAAAAuCESfQEAAAAAAAAAAAAAAAAAAAA3RKIvAAAAAAAAAAAAAAAAAAAA4IZI9AUAAAAAAAAAAAAAAAAAAADcEIm+AAAAAAAAAAAAAAAAAAAAgBsi0RcAAAAAAAAAAAAAAAAAAABwQyT6AgAAAAAAAAAAAAAAAAAAAG6IRF8AAAAAAAAAAAAAAAAAAADADZHoCwAAAAAAAAAAAAAAAAAAALghEn0BAAAAAAAAAAAAAAAAAAAAN0SiLwAAAAAAAAAAAAAAAAAAAOCGSPQFAAAAAAAAAAAAAAAAAAAA3BCJvgAAAAAAAAAAAAAAAAAAAIAbItEXAAAAAAAAAAAAAAAAAAAAcEMk+gIAAAAAAAAAAAAAAAAAAABuiERfAAAAAAAAAAAAAAAAAAAAwA2R6AsAVXD8+HFdcsklstlseuGFF+rsc2fNmiWbzaaLLrpIhw8frrPPBU5H+Ye3ow7Am1H+4UlcVV7dGXUJ54r2H56Gc0B51Cfv4+31YNeuXQoKClJcXJy++OILV4eDOuLt5b4itP84V4wB4M0o//BmlH8A8Bwk+gJAFSxevFhZWVlKSUnRn/70J0nSoEGDtHLlSuc+2dnZevjhh9W8eXMFBgYqPj5ef/3rX1VSUuLcZ+7cuerWrZvztd1u1/jx49WxY0cdOXJEK1eu1KBBg5zvT5w4USdOnJCPj4/+85//1PbXBCpE+Ye3ow7Am1H+4UnOVl5XrlwpwzAUHBys4OBgRUdHa8yYMUpPTy93rL/+9a8yDEPLly8vsz0xMdF5jJCQEEVFRWnw4MGaO3euTNM8Y3zLly9Xr169FBoaqvDwcPXs2VOfffZZmeNmZGSU+Zvf153Y2FgFBgY6P/+iiy7SihUrnO9Tl1BTaP/haTgHcA6A+9cDwzC0ZcsWSaXl2zAMTZ48ucw+w4cP15QpU5yvz1R3Th3zlA4dOigvL08jRozQSy+9dNb/F+oHdy/3tP/wJIwB4M0o//BmlH8A8Bwk+gJAFaSlpalNmzYKCgqq8P3i4mJdfvnl2rx5s7766ivl5ORo8eLFWrJkiW6++eYK/6awsFA33nijfv75Z61Zs0bNmjWrcD8/Pz+1b99eqampNfZ9gOqg/MPbUQfgzSj/8CRnK6+SFBoaqpycHOXk5GjPnj1KTU3V448/XmYf0zQ1Z84cRURE6J133qnwOEeOHFFWVpYOHz6syZMna+rUqbrrrrsq/dz9+/dr5MiReuqpp5Senq6jR4/qpZdeUoMGDar9Pd977z3l5OQoIyNDEyZM0HXXXaeCgoJK96cu4VzQ/sPTcA6oGPXJu7hzPahIeHi4Zs6cWensXedad7p06UKZ9yLuXO5p/+FpGAPAm1H+4c0o/wDgOXxcHQAAeIKSkhJZLJU/G7FgwQLt3r1bBw4cUGhoqCSpR48eWrp0qTp06FDuCbWcnBxdd911Mk1T33zzzVkv7lksljJPxAF1ifIPb0cdgDej/MOTnK28/l54eLiGDx+ud999t8z2b775RklJSVq4cKHGjBmjlJQURUdHV3iMwMBAXXXVVQoPD1e/fv300EMPqWPHjuX227x5sxo1aqThw4dLkqxWqwYOHFj1L1cBi8Wi2267Tffcc48OHTqktm3bnnFf6hKqg/YfnoZzAOcAuHc9qEiLFi3UpUsXPfvss5o9e3a598+17lDmvYs7l3vaf3gaxgDwZpR/eDPKPwB4Dmb0BYCzyMnJ0Zo1axQbG1tm++md1i+++EJXXXWVs3N7Snx8vHr37q0vv/zSuS07O1uXXnqpGjRooOXLl5fp3P5+GYxTWrRooXXr1ikzM7PGvhdQFZR/eDvqALwZ5R+epCrl9fdSU1P10UcfqW/fvmW2v/POO7r66qt1ww03KCYmRvPnzz/r5/fp00cxMTFatWpVhe/36NFDycnJuueee/T5559XuFRwdZWUlGjOnDlq2rSp83tTl1ATaP/haTgHxEqiPnk7d68HlfnrX/+qRYsWaefOneXeq0rdMU2z3LYWLVrowIED2rNnT7Vigedx93JP+w9PwhgA3ozyD29G+QcAz0KiLwCcwfz58xUSEqIjR47oqaeeqnS/1NRUxcTEVPheTEyMUlJSnK+PHTumTZs26Y477pC/v3+V4nj00UdVUFCgsLAwvfrqq9X6DsC5ovzD21EH4M0o//AkVS2vkpSZmamwsDCFhYWpYcOGSkpK0oMPPuh8Pz09XUuXLtXtt98uwzA0duzYSpfu/b2mTZtWevM+Li5Oa9euVU5OjiZMmKDo6GgNHTpUBw4cqPoXPemWW25RWFiYbDabHn30Uf3zn/+Un5/fGf+GuoSqov2Hp+EcwDkAnlEPKhMbG6uJEydWGPe51p1BgwbphhtuULt27TRixIhqxQPP4QnlnvYfnoIxALwZ5R/ejPIPAJ6HRF8AOIOxY8cqLS1NYWFheuONNyrdLyoqSsnJyRW+l5ycXGaZr9atW2vu3Lm6+eab9b///a9KccyZM0d2u13Hjx/XQw89VK3vAJwryj+8HXUA3ozyD09S1fIqSaGhocrIyFBGRoby8/N15513asCAASooKJBUuhRdSEiIrrzySknSbbfdpp07d2r9+vVnjSMpKUkRERE6dOiQgoODnT+HDh2SJHXv3l3z58/XkSNHtGfPHpmmqVtvvVWS5OvrK0kqLi4uc8zi4mLne6csWLBAGRkZKigo0Lp16/TYY4/p888/P2Ns1CVUFe0/PA3nAM4B8Jx6UJk///nPWrFihdatW1fuvTPVncps3bpVixYt0k8//aSlS5eeNW54Jk8p97T/8ASMAeDNKP/wZpR/APA8JPoCwFmEh4dr6NCh2rZtW6X7DB06VJ999pmysrLKbE9ISNAPP/ygoUOHltk+duxYvfXWWxo1apQ++eSTs8bw888/a9CgQWU6ykBdoPzD21EH4M0o//AkVSmvv+fv76+7775bCQkJ2rFjh6TSJXszMzPVvHlzNW7cWP3795dhGGed0WvdunVKTk7WwIED1aJFC+Xk5Dh/WrRoUW7/+Ph4Pfjgg9q+fbskqXHjxvLz81NCQkKZ/fbv319u6bxTDMPQhRdeqL59++rTTz89Y3zUJVQH7T88DecAzgHwvHpwuqioKD322GN6/PHHz7jf7+tOZXbs2KFWrVrpwgsvPON+8HyeVu5p/+HOGAPAm1H+4c0o/wDgWUj0BYAq8Pf3V1FRUaXv33rrrYqPj9fw4cO1e/du2e12/fTTTxoxYoSuvvpqDR48uNzf3HLLLZo9e7ZGjx6tjz/++IyfX1RUVOXlLYCaRvmHt6MOwJtR/uFJzlZef6+kpERvvfWWgoKC1KpVK23atElbt27VV199pS1btjh/3nzzTS1atEi5ubnljlFQUKDly5fr1ltv1YQJE9SxY8cKP2vNmjWaMWOGc/aLX3/9VW+99Zb69OkjSbJarbr55pv19NNPKzk5WQ6HQ99//73eeecd3XLLLZV+h+3bt2vNmjW64IILzvhdqUuoLtp/eBrOAZWjPnkPd64HZ/Pwww9r7969+u6775zbzlZ3KkOZ9y7uXO5p/+FpGAPAm1H+4c0o/wDgOUj0BYAqsFgscjgclb7v5+enr776ShdccIEuvfRS2Ww23Xjjjbruuuu0aNGiSv9u9OjRmjt3rsaMGaMPP/yw0v3sdrusVut5fQfgXFH+4e2oA/BmlH94krOVV0nKzMx0LqcbFRWlDz74QMuWLVN4eLjeeecdDRo0SAMGDFDjxo2dP+PGjVNwcHCZMt2sWTOFhISoWbNmev755/X000/rzTffrPRzw8PD9cUXX6hHjx6y2Wzq3r27wsPDNW/ePOc+r732mtq3b69LLrlEYWFhuuuuu/TPf/5Tw4cPL3Osm2++2fkdrr32Wt1zzz364x//eMbvTV1CddH+w9NwDqgc9cl7uHM9OBubzaZnnnlGaWlpzm1VqTsVocx7F3cu97T/8DSMAeDNKP/wZpR/APAchmma5tl2ysrKUmhoqDIzMxUSElIXcQGAW5k1a5ZefPFFbdq0qc7bwby8PPXt21e33HKLJk+eXKefDUiUf4A6AG9G+YcncWV5dWfUJZwL2n94Gs4BFaM+eRfqgeRwOHTffffp8OHD+t///ufqcFAHKPcVo/3HuWAMAG9G+Yc3o/wDgGtVJy+XGX0BoApGjhyp1q1bKzY2Vi+99FKdfe5bb72lpk2bKiIiQmPHjq2zzwVOR/mHt6MOwJtR/uFJXFVe3Rl1CeeK9h+ehnNAedQn7+Pt9WDXrl0KDw/X2rVr9cQTT7g6HNQRby/3FaH9x7liDABvRvmHN6P8A4DnYEZfAAAAAAAAAAAAAAAAAAAAoI4woy8AAAAAAAAAAAAAAAAAAADg4Uj0BQAAAAAAAAAAAAAAAAAAANwQib4AAAAAAAAAAAAAAAAAAACAGyLRFwAAAAAAAAAAAAAAAAAAAHBDJPoCAAAAAAAAAAAAAAAAAAAAbohEXwAAAAAAAAAAAAAAAAAAAMANkegLAAAAAAAAAAAAAAAAAAAAuCESfQEAAAAAAAAAAAAAAAAAAAA3RKIvAAAAAAAAAAAAAAAAAAAA4IZI9AUAAAAAAAAAAAAAAAAAAADcEIm+AAAAAAAAAAAAAAAAAAAAgBsi0RcAAAAAAAAAAAAAAAAAAABwQyT6AgAAAAAAAAAAAAAAAAAAAG6IRF8AAAAAAAAAAAAAAAAAAADADZHoCwAAAAAAAAAAAAAAAAAAALghEn0BAAAAAAAAAAAAAAAAAAAAN0SiLwAAAAAAAAAAAAAAAAAAAOCGSPQFAAAAAAAAAAAAAAAAAAAA3BCJvgAAAAAAAAAAAAAAAAAAAIAbItEXAAAAAAAAAAAAAAAAAAAAcEMk+gIAAAAAAAAAAAAAAAAAAABuiERfAAAAAAAAAAAAAAAAAAAAwA2R6AsAAAAAAAAAAAAAAAAAAAC4IRJ9AQAAAAAAAAAAAAAAAAAAADdEoi8AAAAAAAAAAAAAAAAAAADghkj0BQAAAAAAAAAAAAAAAAAAANwQib4AAAAAAAAAAAAAAAAAAACAG/Kpyk6maUqSsrKyajUYAAAAAAAAAAAAAAAAAAAAoD47lY97Kj/3TKqU6JudnS1Jat68+XmEBQAAAAAAAAAAAAAAAAAAAEAqzc8NDQ094z6GWYV0YIfDoeTkZDVo0ECGYdRYgAAAAAAAAAAAAAAAAAAAAIA3MU1T2dnZiomJkcViOeO+VUr0BQAAAAAAAAAAAAAAAAAAAFC3zpwGDAAAAAAAAAAAAAAAAAAAAMAlSPQFAAAAAAAAAAAAAAAAAAAA3BCJvgAAAAAAAAAAAAAAAAAAAIAbItEXAAAAAAAAAAAAAAAAAAAAcEMk+gIAAAAAAAAAAAAAAAAAAABuiERfAAAAAAAAAAAAAAAAAAAAwA2R6AsAAAAAAAAAAAAAAAAAAAC4of8HOI1oEEvT/60AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "src_mt_alignments = tagger_bert.align_source_mt([src_tokens], [mt_tokens], 'eng', lang_id)[0]\n", + "draw_aligned_qe(src_tokens, mt_tokens, src_tbd_qe, mt_tbd_qe, src_mt_alignments, title='SRC - MT (BERT)')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} From 6448e1f0605986b88cfabca0d8369d42ada424bc Mon Sep 17 00:00:00 2001 From: Konstantin Chernyshev Date: Sun, 17 Sep 2023 23:17:00 +0200 Subject: [PATCH 23/23] fix: refactor analysis notebook with faster data loading and rate plots --- notebooks/fine-grained-analysis.ipynb | 4132 +++++++++++++++++++++---- 1 file changed, 3609 insertions(+), 523 deletions(-) diff --git a/notebooks/fine-grained-analysis.ipynb b/notebooks/fine-grained-analysis.ipynb index 6236796..d31c693 100644 --- a/notebooks/fine-grained-analysis.ipynb +++ b/notebooks/fine-grained-analysis.ipynb @@ -2,45 +2,63 @@ "cells": [ { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "## Imports" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-08T13:32:41.028504Z", + "start_time": "2023-09-08T13:32:33.050723Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\r\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip available: \u001B[0m\u001B[31;49m22.3.1\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.2.1\u001B[0m\r\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n" + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] } ], "source": [ "!pip install -q seaborn pandas " - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "start_time": "2023-08-27T13:14:02.233295Z" - } - } + ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 40, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-08T13:32:59.390736Z", + "start_time": "2023-09-08T13:32:41.036979Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [], "source": [ "from pathlib import Path\n", "import ast\n", "from collections import defaultdict\n", + "import math\n", "\n", "from tqdm import tqdm\n", "import matplotlib.pyplot as plt\n", @@ -49,45 +67,60 @@ "import seaborn as sns\n", "from stanza.models.common.doc import Sentence as StanzaSentence, Word as StanzaWord, Token as StanzaToken\n", "from astred import Sentence, AlignedSentences" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-27T13:14:08.022332Z", - "start_time": "2023-08-27T13:14:07.457666Z" - } - } + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-08T13:32:59.427131Z", + "start_time": "2023-09-08T13:32:59.389984Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings(\"error\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "## Load data" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-08T13:32:59.476502Z", + "start_time": "2023-09-08T13:32:59.416277Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "True" + "text/plain": [ + "True" + ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -96,25 +129,264 @@ "DATASET_FOLDER = Path() / '..' / 'data' / 'processed'\n", "MERGED_FOLDER = DATASET_FOLDER / 'merged'\n", "MERGED_FOLDER.exists()" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-27T13:14:08.053094Z", - "start_time": "2023-08-27T13:14:08.034709Z" - } - } + ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 79, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-08T13:33:08.861627Z", + "start_time": "2023-09-08T13:33:05.556743Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5160\n" + ] + }, { "data": { - "text/plain": " src_text \\\nunit_id \nflores101-main-ukr-66-pe2-3 The qualities that determine a subculture as d... \nflores101-main-nld-9-pe2-4 A course will normally be from 2-5 days and wi... \nflores101-main-ara-41-pe2-3 For example, one might say that the motor car ... \nflores101-main-ita-3-pe1-3 Most modern research telescopes are enormous f... \nflores101-main-ukr-98-pe1-4 Searches at security checkpoints have also bec... \nflores101-main-vie-19-pe1-1 South Africa have defeated the All Blacks (New... \nflores101-main-ara-102-ht-4 Some cruises feature Berlin, Germany in the br... \nflores101-main-ukr-31-ht-2 It is still produced today, but more important... \nflores101-main-ukr-9-pe2-5 Books and magazines dealing with wilderness su... \nflores101-main-tur-54-ht-1 Murray lost the first set in a tie break after... \nflores101-main-tur-53-ht-5 They all ran back from where the accident had ... \nflores101-main-ukr-16-ht-3 An up-bow usually generates a softer sound, wh... \nflores101-main-tur-68-pe1-4 He was initially hospitalised in the James Pag... \nflores101-main-tur-42-pe1-4 The presence of a true “invisible team” (Larso... \nflores101-main-nld-39-pe2-1 After its adoption by Congress on July 4, a ha... \nflores101-main-vie-80-ht-2 Accepted were Aristotle's views on all matters... \nflores101-main-vie-29-ht-1 The satellites, both of which weighed in exces... \nflores101-main-nld-91-ht-1 The use of video recording has led to importan... \nflores101-main-vie-42-pe2-2 Virtual team members often function as the poi... \nflores101-main-ukr-44-ht-2 The definition has geographic variations, wher... \n\n mt_text \\\nunit_id \nflores101-main-ukr-66-pe2-3 Якістю, яка визначає субкультуру як індивідуал... \nflores101-main-nld-9-pe2-4 Een cursus zal normaal van 2-5 dagen zijn en r... \nflores101-main-ara-41-pe2-3 على سبيل المثال، يمكن القول أن السيارة النارية... \nflores101-main-ita-3-pe1-3 I telescopi di ricerca più moderni sono enormi... \nflores101-main-ukr-98-pe1-4 Обшуки на блокпостах безпеки також стали набаг... \nflores101-main-vie-19-pe1-1 Nam Phi đã đánh bại All Blacks (New Zealand) t... \nflores101-main-ara-102-ht-4 NaN \nflores101-main-ukr-31-ht-2 NaN \nflores101-main-ukr-9-pe2-5 Бібліотеки та журнали про виживання в дикій пр... \nflores101-main-tur-54-ht-1 NaN \nflores101-main-tur-53-ht-5 NaN \nflores101-main-ukr-16-ht-3 NaN \nflores101-main-tur-68-pe1-4 Başlangıçta Great Yarmouth'daki James Paget Ha... \nflores101-main-tur-42-pe1-4 Gerçek bir “görünmez ekibin” varlığı (Larson v... \nflores101-main-nld-39-pe2-1 Nadat het 4 juli door het Congres werd aangeno... \nflores101-main-vie-80-ht-2 NaN \nflores101-main-vie-29-ht-1 NaN \nflores101-main-nld-91-ht-1 NaN \nflores101-main-vie-42-pe2-2 Các thành viên trong nhóm ảo thường là điểm ti... \nflores101-main-ukr-44-ht-2 NaN \n\n tgt_text \\\nunit_id \nflores101-main-ukr-66-pe2-3 Рисами, які визначають субкультуру як відмінну... \nflores101-main-nld-9-pe2-4 Een cursus duurt normaal gesproken 2-5 dagen z... \nflores101-main-ara-41-pe2-3 على سبيل المثال، قد يقول الفرد أن السيارات تؤد... \nflores101-main-ita-3-pe1-3 I telescopi di ricerca più avanzati sono costi... \nflores101-main-ukr-98-pe1-4 Обшуки на безпекових блокпостах також стали на... \nflores101-main-vie-19-pe1-1 Đội tuyển Nam Phi đã đánh bại đội All Blacks (... \nflores101-main-ara-102-ht-4 تعرض بعض الرحلات البحرية زيارة برلين في ألماني... \nflores101-main-ukr-31-ht-2 Їх виготовляють і зараз, але, що важливіше, ві... \nflores101-main-ukr-9-pe2-5 Книги та журнали про виживання в дикій природі... \nflores101-main-tur-54-ht-1 Her iki adamın setteki her bir servisi kazanma... \nflores101-main-tur-53-ht-5 Hepsi kaza yerinden koşarak gelmişti. \nflores101-main-ukr-16-ht-3 Рух смичком угору зазвичай дає м’якший звук, т... \nflores101-main-tur-68-pe1-4 Sürücü önce Great Yarmouth'daki James Paget Ha... \nflores101-main-tur-42-pe1-4 Gerçek bir \"görünmez ekibin\" varlığı (Larson v... \nflores101-main-nld-39-pe2-1 Nadat het op 4 juli door het Congres werd aang... \nflores101-main-vie-80-ht-2 Các quan điểm của Aristotle về tất cả các lĩnh... \nflores101-main-vie-29-ht-1 Hai vệ tinh đã va chạm trong vũ trụ cách Trái ... \nflores101-main-nld-91-ht-1 Het gebruik van video-opnamen heeft geleid tot... \nflores101-main-vie-42-pe2-2 Các thành viên trong đội ngũ ảo thường là đầu ... \nflores101-main-ukr-44-ht-2 Визначення залежить від регіону: для деяких мі... \n\n aligned_edit \\\nunit_id \nflores101-main-ukr-66-pe2-3 REF: якістю , яка визначає субкультуру як *... \nflores101-main-nld-9-pe2-4 REF: een cursus zal normaal van 2-5 d... \nflores101-main-ara-41-pe2-3 REF: على سبيل المثال ، يمكن القول أن السي... \nflores101-main-ita-3-pe1-3 REF: i telescopi di ricerca più moderni sono... \nflores101-main-ukr-98-pe1-4 REF: обшуки на блокпостах безпеки та... \nflores101-main-vie-19-pe1-1 REF: *** ***** nam phi đã đánh bại *** all_bl... \nflores101-main-ara-102-ht-4 NaN \nflores101-main-ukr-31-ht-2 NaN \nflores101-main-ukr-9-pe2-5 REF: бібліотеки та журнали про виживання в ди... \nflores101-main-tur-54-ht-1 NaN \nflores101-main-tur-53-ht-5 NaN \nflores101-main-ukr-16-ht-3 NaN \nflores101-main-tur-68-pe1-4 REF: ****** başlangıçta great yarmouth'daki j... \nflores101-main-tur-42-pe1-4 REF: gerçek bir “görünmez ekibin” varlığı (... \nflores101-main-nld-39-pe2-1 REF: nadat het ** 4 juli door het congres wer... \nflores101-main-vie-80-ht-2 NaN \nflores101-main-vie-29-ht-1 NaN \nflores101-main-nld-91-ht-1 NaN \nflores101-main-vie-42-pe2-2 REF: các thành_viên trong nhóm ảo_thường l... \nflores101-main-ukr-44-ht-2 NaN \n\n lang_id \\\nunit_id \nflores101-main-ukr-66-pe2-3 ukr \nflores101-main-nld-9-pe2-4 nld \nflores101-main-ara-41-pe2-3 ara \nflores101-main-ita-3-pe1-3 ita \nflores101-main-ukr-98-pe1-4 ukr \nflores101-main-vie-19-pe1-1 vie \nflores101-main-ara-102-ht-4 ara \nflores101-main-ukr-31-ht-2 ukr \nflores101-main-ukr-9-pe2-5 ukr \nflores101-main-tur-54-ht-1 tur \nflores101-main-tur-53-ht-5 tur \nflores101-main-ukr-16-ht-3 ukr \nflores101-main-tur-68-pe1-4 tur \nflores101-main-tur-42-pe1-4 tur \nflores101-main-nld-39-pe2-1 nld \nflores101-main-vie-80-ht-2 vie \nflores101-main-vie-29-ht-1 vie \nflores101-main-nld-91-ht-1 nld \nflores101-main-vie-42-pe2-2 vie \nflores101-main-ukr-44-ht-2 ukr \n\n src_tokens \\\nunit_id \nflores101-main-ukr-66-pe2-3 ['The', 'qualities', 'that', 'determine', 'a',... \nflores101-main-nld-9-pe2-4 ['A', 'course', 'will', 'normally', 'be', 'fro... \nflores101-main-ara-41-pe2-3 ['For', 'example', ',', 'one', 'might', 'say',... \nflores101-main-ita-3-pe1-3 ['Most', 'modern', 'research', 'telescopes', '... \nflores101-main-ukr-98-pe1-4 ['Searches', 'at', 'security', 'checkpoints', ... \nflores101-main-vie-19-pe1-1 ['South', 'Africa', 'have', 'defeated', 'the',... \nflores101-main-ara-102-ht-4 ['Some', 'cruises', 'feature', 'Berlin', ',', ... \nflores101-main-ukr-31-ht-2 ['It', 'is', 'still', 'produced', 'today', ','... \nflores101-main-ukr-9-pe2-5 ['Books', 'and', 'magazines', 'dealing', 'with... \nflores101-main-tur-54-ht-1 ['Murray', 'lost', 'the', 'first', 'set', 'in'... \nflores101-main-tur-53-ht-5 ['They', 'all', 'ran', 'back', 'from', 'where'... \nflores101-main-ukr-16-ht-3 ['An', 'up', '-', 'bow', 'usually', 'generates... \nflores101-main-tur-68-pe1-4 ['He', 'was', 'initially', 'hospitalised', 'in... \nflores101-main-tur-42-pe1-4 ['The', 'presence', 'of', 'a', 'true', '“', 'i... \nflores101-main-nld-39-pe2-1 ['After', 'its', 'adoption', 'by', 'Congress',... \nflores101-main-vie-80-ht-2 ['Accepted', 'were', 'Aristotle', \"'s\", 'views... \nflores101-main-vie-29-ht-1 ['The', 'satellites', ',', 'both', 'of', 'whic... \nflores101-main-nld-91-ht-1 ['The', 'use', 'of', 'video', 'recording', 'ha... \nflores101-main-vie-42-pe2-2 ['Virtual', 'team', 'members', 'often', 'funct... \nflores101-main-ukr-44-ht-2 ['The', 'definition', 'has', 'geographic', 'va... \n\n src_annotations \\\nunit_id \nflores101-main-ukr-66-pe2-3 [{'lemma': 'the', 'upos': 'DET', 'feats': 'Def... \nflores101-main-nld-9-pe2-4 [{'lemma': 'a', 'upos': 'DET', 'feats': 'Defin... \nflores101-main-ara-41-pe2-3 [{'lemma': 'for', 'upos': 'ADP', 'feats': '', ... \nflores101-main-ita-3-pe1-3 [{'lemma': 'most', 'upos': 'ADJ', 'feats': 'De... \nflores101-main-ukr-98-pe1-4 [{'lemma': 'search', 'upos': 'NOUN', 'feats': ... \nflores101-main-vie-19-pe1-1 [{'lemma': 'South', 'upos': 'PROPN', 'feats': ... \nflores101-main-ara-102-ht-4 [{'lemma': 'some', 'upos': 'DET', 'feats': '',... \nflores101-main-ukr-31-ht-2 [{'lemma': 'it', 'upos': 'PRON', 'feats': 'Cas... \nflores101-main-ukr-9-pe2-5 [{'lemma': 'book', 'upos': 'NOUN', 'feats': 'N... \nflores101-main-tur-54-ht-1 [{'lemma': 'Murray', 'upos': 'PROPN', 'feats':... \nflores101-main-tur-53-ht-5 [{'lemma': 'they', 'upos': 'PRON', 'feats': 'C... \nflores101-main-ukr-16-ht-3 [{'lemma': 'a', 'upos': 'DET', 'feats': 'Defin... \nflores101-main-tur-68-pe1-4 [{'lemma': 'he', 'upos': 'PRON', 'feats': 'Cas... \nflores101-main-tur-42-pe1-4 [{'lemma': 'the', 'upos': 'DET', 'feats': 'Def... \nflores101-main-nld-39-pe2-1 [{'lemma': 'after', 'upos': 'ADP', 'feats': ''... \nflores101-main-vie-80-ht-2 [{'lemma': 'accept', 'upos': 'VERB', 'feats': ... \nflores101-main-vie-29-ht-1 [{'lemma': 'the', 'upos': 'DET', 'feats': 'Def... \nflores101-main-nld-91-ht-1 [{'lemma': 'the', 'upos': 'DET', 'feats': 'Def... \nflores101-main-vie-42-pe2-2 [{'lemma': 'virtual', 'upos': 'ADJ', 'feats': ... \nflores101-main-ukr-44-ht-2 [{'lemma': 'the', 'upos': 'DET', 'feats': 'Def... \n\n mt_tokens \\\nunit_id \nflores101-main-ukr-66-pe2-3 ['Якістю', ',', 'яка', 'визначає', 'субкультур... \nflores101-main-nld-9-pe2-4 ['Een', 'cursus', 'zal', 'normaal', 'van', '2-... \nflores101-main-ara-41-pe2-3 ['على', 'سبيل', 'المثال', '،', 'يمكن', 'القول'... \nflores101-main-ita-3-pe1-3 ['I', 'telescopi', 'di', 'ricerca', 'più', 'mo... \nflores101-main-ukr-98-pe1-4 ['Обшуки', 'на', 'блокпостах', 'безпеки', 'так... \nflores101-main-vie-19-pe1-1 ['Nam', 'Phi', 'đã', 'đánh', 'bại', 'All Black... \nflores101-main-ara-102-ht-4 NaN \nflores101-main-ukr-31-ht-2 NaN \nflores101-main-ukr-9-pe2-5 ['Бібліотеки', 'та', 'журнали', 'про', 'вижива... \nflores101-main-tur-54-ht-1 NaN \nflores101-main-tur-53-ht-5 NaN \nflores101-main-ukr-16-ht-3 NaN \nflores101-main-tur-68-pe1-4 ['Başlangıçta', 'Great', \"Yarmouth'daki\", 'Jam... \nflores101-main-tur-42-pe1-4 ['Gerçek', 'bir', '“görünmez', 'ekibin”', 'var... \nflores101-main-nld-39-pe2-1 ['Nadat', 'het', '4', 'juli', 'door', 'het', '... \nflores101-main-vie-80-ht-2 NaN \nflores101-main-vie-29-ht-1 NaN \nflores101-main-nld-91-ht-1 NaN \nflores101-main-vie-42-pe2-2 ['Các', 'thành viên', 'trong', 'nhóm', 'ảo thư... \nflores101-main-ukr-44-ht-2 NaN \n\n mt_annotations \\\nunit_id \nflores101-main-ukr-66-pe2-3 [{'lemma': 'якість', 'upos': 'NOUN', 'feats': ... \nflores101-main-nld-9-pe2-4 [{'lemma': 'een', 'upos': 'DET', 'feats': 'Def... \nflores101-main-ara-41-pe2-3 [{'lemma': 'عَلَى', 'upos': 'ADP', 'feats': 'A... \nflores101-main-ita-3-pe1-3 [{'lemma': 'il', 'upos': 'DET', 'feats': 'Defi... \nflores101-main-ukr-98-pe1-4 [{'lemma': 'обшук', 'upos': 'NOUN', 'feats': '... \nflores101-main-vie-19-pe1-1 [{'lemma': 'Nam', 'upos': 'NOUN', 'feats': '',... \nflores101-main-ara-102-ht-4 NaN \nflores101-main-ukr-31-ht-2 NaN \nflores101-main-ukr-9-pe2-5 [{'lemma': 'бібліотека', 'upos': 'NOUN', 'feat... \nflores101-main-tur-54-ht-1 NaN \nflores101-main-tur-53-ht-5 NaN \nflores101-main-ukr-16-ht-3 NaN \nflores101-main-tur-68-pe1-4 [{'lemma': 'başlangıç', 'upos': 'NOUN', 'feats... \nflores101-main-tur-42-pe1-4 [{'lemma': 'gerçek', 'upos': 'ADJ', 'feats': '... \nflores101-main-nld-39-pe2-1 [{'lemma': 'nadat', 'upos': 'SCONJ', 'feats': ... \nflores101-main-vie-80-ht-2 NaN \nflores101-main-vie-29-ht-1 NaN \nflores101-main-nld-91-ht-1 NaN \nflores101-main-vie-42-pe2-2 [{'lemma': 'Các', 'upos': 'DET', 'feats': '', ... \nflores101-main-ukr-44-ht-2 NaN \n\n tgt_tokens \\\nunit_id \nflores101-main-ukr-66-pe2-3 ['Рисами', ',', 'які', 'визначають', 'субкульт... \nflores101-main-nld-9-pe2-4 ['Een', 'cursus', 'duurt', 'normaal', 'gesprok... \nflores101-main-ara-41-pe2-3 ['على', 'سبيل', 'المثال', '،', 'قد', 'يقول', '... \nflores101-main-ita-3-pe1-3 ['I', 'telescopi', 'di', 'ricerca', 'più', 'av... \nflores101-main-ukr-98-pe1-4 ['Обшуки', 'на', 'безпекових', 'блокпостах', '... \nflores101-main-vie-19-pe1-1 ['Đội', 'tuyển', 'Nam', 'Phi', 'đã', 'đánh', '... \nflores101-main-ara-102-ht-4 ['تعرض', 'بعض', 'الرحلات', 'البحرية', 'زيارة',... \nflores101-main-ukr-31-ht-2 ['Їх', 'виготовляють', 'і', 'зараз', ',', 'але... \nflores101-main-ukr-9-pe2-5 ['Книги', 'та', 'журнали', 'про', 'виживання',... \nflores101-main-tur-54-ht-1 ['Her', 'iki', 'adamın', 'setteki', 'her', 'bi... \nflores101-main-tur-53-ht-5 ['Hepsi', 'kaza', 'yerinden', 'koşarak', 'gelm... \nflores101-main-ukr-16-ht-3 ['Рух', 'смичком', 'угору', 'зазвичай', 'дає',... \nflores101-main-tur-68-pe1-4 ['Sürücü', 'önce', 'Great', \"Yarmouth'daki\", '... \nflores101-main-tur-42-pe1-4 ['Gerçek', 'bir', '\"görünmez', 'ekibin\"', 'var... \nflores101-main-nld-39-pe2-1 ['Nadat', 'het', 'op', '4', 'juli', 'door', 'h... \nflores101-main-vie-80-ht-2 ['Các', 'quan điểm', 'của', 'Aristotle', 'về',... \nflores101-main-vie-29-ht-1 ['Hai', 'vệ', 'tinh', 'đã', 'va', 'chạm', 'tro... \nflores101-main-nld-91-ht-1 ['Het', 'gebruik', 'van', 'video-opnamen', 'he... \nflores101-main-vie-42-pe2-2 ['Các', 'thành viên', 'trong', 'đội ngũ', 'ảo ... \nflores101-main-ukr-44-ht-2 ['Визначення', 'залежить', 'від', 'регіону', '... \n\n ... doc_id time_s time_m time_h \\\nunit_id ... \nflores101-main-ukr-66-pe2-3 ... 66 86.563 1.4427 0.0240 \nflores101-main-nld-9-pe2-4 ... 9 19.836 0.3306 0.0055 \nflores101-main-ara-41-pe2-3 ... 41 52.346 0.8724 0.0145 \nflores101-main-ita-3-pe1-3 ... 3 137.284 2.2881 0.0381 \nflores101-main-ukr-98-pe1-4 ... 98 28.684 0.4781 0.0080 \nflores101-main-vie-19-pe1-1 ... 19 182.751 3.0458 0.0508 \nflores101-main-ara-102-ht-4 ... 102 74.626 1.2438 0.0207 \nflores101-main-ukr-31-ht-2 ... 31 91.587 1.5264 0.0254 \nflores101-main-ukr-9-pe2-5 ... 9 45.923 0.7654 0.0128 \nflores101-main-tur-54-ht-1 ... 54 142.789 2.3798 0.0397 \nflores101-main-tur-53-ht-5 ... 53 26.832 0.4472 0.0075 \nflores101-main-ukr-16-ht-3 ... 16 97.448 1.6241 0.0271 \nflores101-main-tur-68-pe1-4 ... 68 19.456 0.3243 0.0054 \nflores101-main-tur-42-pe1-4 ... 42 30.930 0.5155 0.0086 \nflores101-main-nld-39-pe2-1 ... 39 73.994 1.2332 0.0206 \nflores101-main-vie-80-ht-2 ... 80 42.489 0.7081 0.0118 \nflores101-main-vie-29-ht-1 ... 29 178.737 2.9789 0.0496 \nflores101-main-nld-91-ht-1 ... 91 35.700 0.5950 0.0099 \nflores101-main-vie-42-pe2-2 ... 42 101.728 1.6955 0.0283 \nflores101-main-ukr-44-ht-2 ... 44 226.385 3.7731 0.0629 \n\n time_per_char time_per_word key_per_char \\\nunit_id \nflores101-main-ukr-66-pe2-3 0.5549 4.1220 1.9872 \nflores101-main-nld-9-pe2-4 0.1681 0.9016 0.6102 \nflores101-main-ara-41-pe2-3 0.5690 3.2716 0.4022 \nflores101-main-ita-3-pe1-3 1.2480 9.8060 1.3182 \nflores101-main-ukr-98-pe1-4 0.2758 1.7928 1.0096 \nflores101-main-vie-19-pe1-1 1.2265 7.3100 1.6174 \nflores101-main-ara-102-ht-4 0.3969 1.9638 1.1330 \nflores101-main-ukr-31-ht-2 0.7386 4.5794 1.5161 \nflores101-main-ukr-9-pe2-5 0.4064 2.7014 2.2478 \nflores101-main-tur-54-ht-1 1.5354 7.1394 2.3226 \nflores101-main-tur-53-ht-5 0.4879 2.6832 0.7273 \nflores101-main-ukr-16-ht-3 1.0592 6.4965 1.3913 \nflores101-main-tur-68-pe1-4 0.2560 1.6213 0.7500 \nflores101-main-tur-42-pe1-4 0.2621 1.4729 0.5339 \nflores101-main-nld-39-pe2-1 0.3474 1.9472 0.0329 \nflores101-main-vie-80-ht-2 0.5311 3.8626 2.6375 \nflores101-main-vie-29-ht-1 1.1606 7.1495 2.6299 \nflores101-main-nld-91-ht-1 0.2364 1.6227 1.2914 \nflores101-main-vie-42-pe2-2 1.0708 6.7819 1.8421 \nflores101-main-ukr-44-ht-2 1.7967 10.7802 2.4762 \n\n words_per_hour words_per_minute \\\nunit_id \nflores101-main-ukr-66-pe2-3 873.3524 14.5559 \nflores101-main-nld-9-pe2-4 3992.7405 66.5457 \nflores101-main-ara-41-pe2-3 1100.3706 18.3395 \nflores101-main-ita-3-pe1-3 367.1222 6.1187 \nflores101-main-ukr-98-pe1-4 2008.0881 33.4681 \nflores101-main-vie-19-pe1-1 492.4734 8.2079 \nflores101-main-ara-102-ht-4 1833.1413 30.5524 \nflores101-main-ukr-31-ht-2 786.1378 13.1023 \nflores101-main-ukr-9-pe2-5 1332.6655 22.2111 \nflores101-main-tur-54-ht-1 504.2405 8.4040 \nflores101-main-tur-53-ht-5 1341.6816 22.3614 \nflores101-main-ukr-16-ht-3 554.1417 9.2357 \nflores101-main-tur-68-pe1-4 2220.3947 37.0066 \nflores101-main-tur-42-pe1-4 2444.2289 40.7371 \nflores101-main-nld-39-pe2-1 1848.7986 30.8133 \nflores101-main-vie-80-ht-2 932.0059 15.5334 \nflores101-main-vie-29-ht-1 503.5331 8.3922 \nflores101-main-nld-91-ht-1 2218.4874 36.9748 \nflores101-main-vie-42-pe2-2 530.8273 8.8471 \nflores101-main-ukr-44-ht-2 333.9444 5.5657 \n\n per_subject_visit_order \nunit_id \nflores101-main-ukr-66-pe2-3 284 \nflores101-main-nld-9-pe2-4 388 \nflores101-main-ara-41-pe2-3 181 \nflores101-main-ita-3-pe1-3 424 \nflores101-main-ukr-98-pe1-4 421 \nflores101-main-vie-19-pe1-1 74 \nflores101-main-ara-102-ht-4 20 \nflores101-main-ukr-31-ht-2 132 \nflores101-main-ukr-9-pe2-5 385 \nflores101-main-tur-54-ht-1 235 \nflores101-main-tur-53-ht-5 234 \nflores101-main-ukr-16-ht-3 67 \nflores101-main-tur-68-pe1-4 294 \nflores101-main-tur-42-pe1-4 187 \nflores101-main-nld-39-pe2-1 93 \nflores101-main-vie-80-ht-2 429 \nflores101-main-vie-29-ht-1 119 \nflores101-main-nld-91-ht-1 394 \nflores101-main-vie-42-pe2-2 178 \nflores101-main-ukr-44-ht-2 194 \n\n[20 rows x 66 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
src_textmt_texttgt_textaligned_editlang_idsrc_tokenssrc_annotationsmt_tokensmt_annotationstgt_tokens...doc_idtime_stime_mtime_htime_per_chartime_per_wordkey_per_charwords_per_hourwords_per_minuteper_subject_visit_order
unit_id
flores101-main-ukr-66-pe2-3The qualities that determine a subculture as d...Якістю, яка визначає субкультуру як індивідуал...Рисами, які визначають субкультуру як відмінну...REF: якістю , яка визначає субкультуру як *...ukr['The', 'qualities', 'that', 'determine', 'a',...[{'lemma': 'the', 'upos': 'DET', 'feats': 'Def...['Якістю', ',', 'яка', 'визначає', 'субкультур...[{'lemma': 'якість', 'upos': 'NOUN', 'feats': ...['Рисами', ',', 'які', 'визначають', 'субкульт......6686.5631.44270.02400.55494.12201.9872873.352414.5559284
flores101-main-nld-9-pe2-4A course will normally be from 2-5 days and wi...Een cursus zal normaal van 2-5 dagen zijn en r...Een cursus duurt normaal gesproken 2-5 dagen z...REF: een cursus zal normaal van 2-5 d...nld['A', 'course', 'will', 'normally', 'be', 'fro...[{'lemma': 'a', 'upos': 'DET', 'feats': 'Defin...['Een', 'cursus', 'zal', 'normaal', 'van', '2-...[{'lemma': 'een', 'upos': 'DET', 'feats': 'Def...['Een', 'cursus', 'duurt', 'normaal', 'gesprok......919.8360.33060.00550.16810.90160.61023992.740566.5457388
flores101-main-ara-41-pe2-3For example, one might say that the motor car ...على سبيل المثال، يمكن القول أن السيارة النارية...على سبيل المثال، قد يقول الفرد أن السيارات تؤد...REF: على سبيل المثال ، يمكن القول أن السي...ara['For', 'example', ',', 'one', 'might', 'say',...[{'lemma': 'for', 'upos': 'ADP', 'feats': '', ...['على', 'سبيل', 'المثال', '،', 'يمكن', 'القول'...[{'lemma': 'عَلَى', 'upos': 'ADP', 'feats': 'A...['على', 'سبيل', 'المثال', '،', 'قد', 'يقول', '......4152.3460.87240.01450.56903.27160.40221100.370618.3395181
flores101-main-ita-3-pe1-3Most modern research telescopes are enormous f...I telescopi di ricerca più moderni sono enormi...I telescopi di ricerca più avanzati sono costi...REF: i telescopi di ricerca più moderni sono...ita['Most', 'modern', 'research', 'telescopes', '...[{'lemma': 'most', 'upos': 'ADJ', 'feats': 'De...['I', 'telescopi', 'di', 'ricerca', 'più', 'mo...[{'lemma': 'il', 'upos': 'DET', 'feats': 'Defi...['I', 'telescopi', 'di', 'ricerca', 'più', 'av......3137.2842.28810.03811.24809.80601.3182367.12226.1187424
flores101-main-ukr-98-pe1-4Searches at security checkpoints have also bec...Обшуки на блокпостах безпеки також стали набаг...Обшуки на безпекових блокпостах також стали на...REF: обшуки на блокпостах безпеки та...ukr['Searches', 'at', 'security', 'checkpoints', ...[{'lemma': 'search', 'upos': 'NOUN', 'feats': ...['Обшуки', 'на', 'блокпостах', 'безпеки', 'так...[{'lemma': 'обшук', 'upos': 'NOUN', 'feats': '...['Обшуки', 'на', 'безпекових', 'блокпостах', '......9828.6840.47810.00800.27581.79281.00962008.088133.4681421
flores101-main-vie-19-pe1-1South Africa have defeated the All Blacks (New...Nam Phi đã đánh bại All Blacks (New Zealand) t...Đội tuyển Nam Phi đã đánh bại đội All Blacks (...REF: *** ***** nam phi đã đánh bại *** all_bl...vie['South', 'Africa', 'have', 'defeated', 'the',...[{'lemma': 'South', 'upos': 'PROPN', 'feats': ...['Nam', 'Phi', 'đã', 'đánh', 'bại', 'All Black...[{'lemma': 'Nam', 'upos': 'NOUN', 'feats': '',...['Đội', 'tuyển', 'Nam', 'Phi', 'đã', 'đánh', '......19182.7513.04580.05081.22657.31001.6174492.47348.207974
flores101-main-ara-102-ht-4Some cruises feature Berlin, Germany in the br...NaNتعرض بعض الرحلات البحرية زيارة برلين في ألماني...NaNara['Some', 'cruises', 'feature', 'Berlin', ',', ...[{'lemma': 'some', 'upos': 'DET', 'feats': '',...NaNNaN['تعرض', 'بعض', 'الرحلات', 'البحرية', 'زيارة',......10274.6261.24380.02070.39691.96381.13301833.141330.552420
flores101-main-ukr-31-ht-2It is still produced today, but more important...NaNЇх виготовляють і зараз, але, що важливіше, ві...NaNukr['It', 'is', 'still', 'produced', 'today', ','...[{'lemma': 'it', 'upos': 'PRON', 'feats': 'Cas...NaNNaN['Їх', 'виготовляють', 'і', 'зараз', ',', 'але......3191.5871.52640.02540.73864.57941.5161786.137813.1023132
flores101-main-ukr-9-pe2-5Books and magazines dealing with wilderness su...Бібліотеки та журнали про виживання в дикій пр...Книги та журнали про виживання в дикій природі...REF: бібліотеки та журнали про виживання в ди...ukr['Books', 'and', 'magazines', 'dealing', 'with...[{'lemma': 'book', 'upos': 'NOUN', 'feats': 'N...['Бібліотеки', 'та', 'журнали', 'про', 'вижива...[{'lemma': 'бібліотека', 'upos': 'NOUN', 'feat...['Книги', 'та', 'журнали', 'про', 'виживання',......945.9230.76540.01280.40642.70142.24781332.665522.2111385
flores101-main-tur-54-ht-1Murray lost the first set in a tie break after...NaNHer iki adamın setteki her bir servisi kazanma...NaNtur['Murray', 'lost', 'the', 'first', 'set', 'in'...[{'lemma': 'Murray', 'upos': 'PROPN', 'feats':...NaNNaN['Her', 'iki', 'adamın', 'setteki', 'her', 'bi......54142.7892.37980.03971.53547.13942.3226504.24058.4040235
flores101-main-tur-53-ht-5They all ran back from where the accident had ...NaNHepsi kaza yerinden koşarak gelmişti.NaNtur['They', 'all', 'ran', 'back', 'from', 'where'...[{'lemma': 'they', 'upos': 'PRON', 'feats': 'C...NaNNaN['Hepsi', 'kaza', 'yerinden', 'koşarak', 'gelm......5326.8320.44720.00750.48792.68320.72731341.681622.3614234
flores101-main-ukr-16-ht-3An up-bow usually generates a softer sound, wh...NaNРух смичком угору зазвичай дає м’якший звук, т...NaNukr['An', 'up', '-', 'bow', 'usually', 'generates...[{'lemma': 'a', 'upos': 'DET', 'feats': 'Defin...NaNNaN['Рух', 'смичком', 'угору', 'зазвичай', 'дає',......1697.4481.62410.02711.05926.49651.3913554.14179.235767
flores101-main-tur-68-pe1-4He was initially hospitalised in the James Pag...Başlangıçta Great Yarmouth'daki James Paget Ha...Sürücü önce Great Yarmouth'daki James Paget Ha...REF: ****** başlangıçta great yarmouth'daki j...tur['He', 'was', 'initially', 'hospitalised', 'in...[{'lemma': 'he', 'upos': 'PRON', 'feats': 'Cas...['Başlangıçta', 'Great', \"Yarmouth'daki\", 'Jam...[{'lemma': 'başlangıç', 'upos': 'NOUN', 'feats...['Sürücü', 'önce', 'Great', \"Yarmouth'daki\", '......6819.4560.32430.00540.25601.62130.75002220.394737.0066294
flores101-main-tur-42-pe1-4The presence of a true “invisible team” (Larso...Gerçek bir “görünmez ekibin” varlığı (Larson v...Gerçek bir \"görünmez ekibin\" varlığı (Larson v...REF: gerçek bir “görünmez ekibin” varlığı (...tur['The', 'presence', 'of', 'a', 'true', '“', 'i...[{'lemma': 'the', 'upos': 'DET', 'feats': 'Def...['Gerçek', 'bir', '“görünmez', 'ekibin”', 'var...[{'lemma': 'gerçek', 'upos': 'ADJ', 'feats': '...['Gerçek', 'bir', '\"görünmez', 'ekibin\"', 'var......4230.9300.51550.00860.26211.47290.53392444.228940.7371187
flores101-main-nld-39-pe2-1After its adoption by Congress on July 4, a ha...Nadat het 4 juli door het Congres werd aangeno...Nadat het op 4 juli door het Congres werd aang...REF: nadat het ** 4 juli door het congres wer...nld['After', 'its', 'adoption', 'by', 'Congress',...[{'lemma': 'after', 'upos': 'ADP', 'feats': ''...['Nadat', 'het', '4', 'juli', 'door', 'het', '...[{'lemma': 'nadat', 'upos': 'SCONJ', 'feats': ...['Nadat', 'het', 'op', '4', 'juli', 'door', 'h......3973.9941.23320.02060.34741.94720.03291848.798630.813393
flores101-main-vie-80-ht-2Accepted were Aristotle's views on all matters...NaNCác quan điểm của Aristotle về tất cả các lĩnh...NaNvie['Accepted', 'were', 'Aristotle', \"'s\", 'views...[{'lemma': 'accept', 'upos': 'VERB', 'feats': ...NaNNaN['Các', 'quan điểm', 'của', 'Aristotle', 'về',......8042.4890.70810.01180.53113.86262.6375932.005915.5334429
flores101-main-vie-29-ht-1The satellites, both of which weighed in exces...NaNHai vệ tinh đã va chạm trong vũ trụ cách Trái ...NaNvie['The', 'satellites', ',', 'both', 'of', 'whic...[{'lemma': 'the', 'upos': 'DET', 'feats': 'Def...NaNNaN['Hai', 'vệ', 'tinh', 'đã', 'va', 'chạm', 'tro......29178.7372.97890.04961.16067.14952.6299503.53318.3922119
flores101-main-nld-91-ht-1The use of video recording has led to importan...NaNHet gebruik van video-opnamen heeft geleid tot...NaNnld['The', 'use', 'of', 'video', 'recording', 'ha...[{'lemma': 'the', 'upos': 'DET', 'feats': 'Def...NaNNaN['Het', 'gebruik', 'van', 'video-opnamen', 'he......9135.7000.59500.00990.23641.62271.29142218.487436.9748394
flores101-main-vie-42-pe2-2Virtual team members often function as the poi...Các thành viên trong nhóm ảo thường là điểm ti...Các thành viên trong đội ngũ ảo thường là đầu ...REF: các thành_viên trong nhóm ảo_thường l...vie['Virtual', 'team', 'members', 'often', 'funct...[{'lemma': 'virtual', 'upos': 'ADJ', 'feats': ...['Các', 'thành viên', 'trong', 'nhóm', 'ảo thư...[{'lemma': 'Các', 'upos': 'DET', 'feats': '', ...['Các', 'thành viên', 'trong', 'đội ngũ', 'ảo ......42101.7281.69550.02831.07086.78191.8421530.82738.8471178
flores101-main-ukr-44-ht-2The definition has geographic variations, wher...NaNВизначення залежить від регіону: для деяких мі...NaNukr['The', 'definition', 'has', 'geographic', 'va...[{'lemma': 'the', 'upos': 'DET', 'feats': 'Def...NaNNaN['Визначення', 'залежить', 'від', 'регіону', '......44226.3853.77310.06291.796710.78022.4762333.94445.5657194
\n

20 rows × 66 columns

\n
" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
src_textmt_texttgt_textaligned_editlang_idsrc_tokenssrc_annotationsmt_tokensmt_annotationstgt_tokens...doc_idtime_stime_mtime_htime_per_chartime_per_wordkey_per_charwords_per_hourwords_per_minuteper_subject_visit_order
unit_id
flores101-main-nld-48-pe2-2A satellite phone is not generally a replaceme...Een satelliettelefoon is meestal geen vervangi...Een satelliettelefoon is meestal geen goed alt...REF: een satelliettelefoon is meestal geen **...nld['A', 'satellite', 'phone', 'is', 'not', 'gene...[{'lemma': 'a', 'upos': 'DET', 'feats': 'Defin...['Een', 'satelliettelefoon', 'is', 'meestal', ...[{'lemma': 'een', 'upos': 'DET', 'feats': 'Def...['Een', 'satelliettelefoon', 'is', 'meestal', ......4847.2820.78800.01310.29741.52520.57232360.306239.3384212
flores101-main-ita-92-pe1-1Out of 1,400 people polled prior to the 2010 f...Su 1.400 persone intervistate prima delle elez...Su 1.400 persone intervistate prima delle elez...REF: su 1.400 persone intervistate prima dell...ita['Out', 'of', '1,400', 'people', 'polled', 'pr...[{'lemma': 'out', 'upos': 'ADP', 'feats': '', ...['Su', '1.400', 'persone', 'intervistate', 'pr...[{'lemma': 'su', 'upos': 'ADP', 'feats': '', '...['Su', '1.400', 'persone', 'intervistate', 'pr......92139.1882.31980.03870.99425.56751.0071646.607510.7768389
flores101-main-ara-42-pe2-2Virtual team members often function as the poi...غالباً ما يعمل أعضاء الفريق الافتراضي كمكان ال...غالباً ما يعمل أعضاء الفريق الافتراضي كنقطة ات...REF: غالباً ما يعمل أعضاء الفريق الافتراضي كم...ara['Virtual', 'team', 'members', 'often', 'funct...[{'lemma': 'virtual', 'upos': 'ADJ', 'feats': ...['غالباً', 'ما', 'يعمل', 'أعضاء', 'الفريق', 'ا...[{'lemma': 'غَالِب', 'upos': 'ADJ', 'feats': '...['غالباً', 'ما', 'يعمل', 'أعضاء', 'الفريق', 'ا......4257.9930.96660.01610.61053.86620.4632931.146915.5191185
\n", + "

3 rows × 66 columns

\n", + "
" + ], + "text/plain": [ + " src_text \\\n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 A satellite phone is not generally a replaceme... \n", + "flores101-main-ita-92-pe1-1 Out of 1,400 people polled prior to the 2010 f... \n", + "flores101-main-ara-42-pe2-2 Virtual team members often function as the poi... \n", + "\n", + " mt_text \\\n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 Een satelliettelefoon is meestal geen vervangi... \n", + "flores101-main-ita-92-pe1-1 Su 1.400 persone intervistate prima delle elez... \n", + "flores101-main-ara-42-pe2-2 غالباً ما يعمل أعضاء الفريق الافتراضي كمكان ال... \n", + "\n", + " tgt_text \\\n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 Een satelliettelefoon is meestal geen goed alt... \n", + "flores101-main-ita-92-pe1-1 Su 1.400 persone intervistate prima delle elez... \n", + "flores101-main-ara-42-pe2-2 غالباً ما يعمل أعضاء الفريق الافتراضي كنقطة ات... \n", + "\n", + " aligned_edit \\\n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 REF: een satelliettelefoon is meestal geen **... \n", + "flores101-main-ita-92-pe1-1 REF: su 1.400 persone intervistate prima dell... \n", + "flores101-main-ara-42-pe2-2 REF: غالباً ما يعمل أعضاء الفريق الافتراضي كم... \n", + "\n", + " lang_id \\\n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 nld \n", + "flores101-main-ita-92-pe1-1 ita \n", + "flores101-main-ara-42-pe2-2 ara \n", + "\n", + " src_tokens \\\n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 ['A', 'satellite', 'phone', 'is', 'not', 'gene... \n", + "flores101-main-ita-92-pe1-1 ['Out', 'of', '1,400', 'people', 'polled', 'pr... \n", + "flores101-main-ara-42-pe2-2 ['Virtual', 'team', 'members', 'often', 'funct... \n", + "\n", + " src_annotations \\\n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 [{'lemma': 'a', 'upos': 'DET', 'feats': 'Defin... \n", + "flores101-main-ita-92-pe1-1 [{'lemma': 'out', 'upos': 'ADP', 'feats': '', ... \n", + "flores101-main-ara-42-pe2-2 [{'lemma': 'virtual', 'upos': 'ADJ', 'feats': ... \n", + "\n", + " mt_tokens \\\n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 ['Een', 'satelliettelefoon', 'is', 'meestal', ... \n", + "flores101-main-ita-92-pe1-1 ['Su', '1.400', 'persone', 'intervistate', 'pr... \n", + "flores101-main-ara-42-pe2-2 ['غالباً', 'ما', 'يعمل', 'أعضاء', 'الفريق', 'ا... \n", + "\n", + " mt_annotations \\\n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 [{'lemma': 'een', 'upos': 'DET', 'feats': 'Def... \n", + "flores101-main-ita-92-pe1-1 [{'lemma': 'su', 'upos': 'ADP', 'feats': '', '... \n", + "flores101-main-ara-42-pe2-2 [{'lemma': 'غَالِب', 'upos': 'ADJ', 'feats': '... \n", + "\n", + " tgt_tokens \\\n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 ['Een', 'satelliettelefoon', 'is', 'meestal', ... \n", + "flores101-main-ita-92-pe1-1 ['Su', '1.400', 'persone', 'intervistate', 'pr... \n", + "flores101-main-ara-42-pe2-2 ['غالباً', 'ما', 'يعمل', 'أعضاء', 'الفريق', 'ا... \n", + "\n", + " ... doc_id time_s time_m time_h \\\n", + "unit_id ... \n", + "flores101-main-nld-48-pe2-2 ... 48 47.282 0.7880 0.0131 \n", + "flores101-main-ita-92-pe1-1 ... 92 139.188 2.3198 0.0387 \n", + "flores101-main-ara-42-pe2-2 ... 42 57.993 0.9666 0.0161 \n", + "\n", + " time_per_char time_per_word key_per_char \\\n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 0.2974 1.5252 0.5723 \n", + "flores101-main-ita-92-pe1-1 0.9942 5.5675 1.0071 \n", + "flores101-main-ara-42-pe2-2 0.6105 3.8662 0.4632 \n", + "\n", + " words_per_hour words_per_minute \\\n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 2360.3062 39.3384 \n", + "flores101-main-ita-92-pe1-1 646.6075 10.7768 \n", + "flores101-main-ara-42-pe2-2 931.1469 15.5191 \n", + "\n", + " per_subject_visit_order \n", + "unit_id \n", + "flores101-main-nld-48-pe2-2 212 \n", + "flores101-main-ita-92-pe1-1 389 \n", + "flores101-main-ara-42-pe2-2 185 \n", + "\n", + "[3 rows x 66 columns]" + ] }, - "execution_count": 5, + "execution_count": 79, "metadata": {}, "output_type": "execute_result" } @@ -133,55 +405,542 @@ "\n", " df = pd.concat([df, lang_df], ignore_index=False)\n", "\n", - "df.sample(20)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-27T13:17:13.399083Z", - "start_time": "2023-08-27T13:17:11.128478Z" + " \n", + "# filter out not MT samples \n", + "df = df[df['mt_tokens'].notna()]\n", + "\n", + "print(len(df))\n", + "df.sample(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 17.3 s, sys: 414 ms, total: 17.7 s\n", + "Wall time: 18.6 s\n" + ] } - } + ], + "source": [ + "%%time\n", + "# process all saved strings to actual lists\n", + "df['tgt_tokens'] = df['tgt_tokens'].apply(ast.literal_eval)\n", + "df['tgt_annotations'] = df['tgt_annotations'].apply(ast.literal_eval)\n", + "df['mt_tokens'] = df['mt_tokens'].apply(ast.literal_eval)\n", + "df['mt_annotations'] = df['mt_annotations'].apply(ast.literal_eval)\n", + "df['mt_tbd_qe'] = df['mt_tbd_qe'].apply(ast.literal_eval)\n", + "df['mt_wmt22_qe'] = df['mt_wmt22_qe'].apply(ast.literal_eval)\n", + "\n", + "\n", + "def process_alignments(mt_pe_tbd_qe_alignments):\n", + " mt_pe_alignments_dict = defaultdict(list)\n", + " for k, v, score in mt_pe_tbd_qe_alignments:\n", + " if k is not None:\n", + " mt_pe_alignments_dict[k].append(v)\n", + " return mt_pe_alignments_dict\n", + "df['mt_pe_tbd_qe_alignments'] = df['mt_pe_tbd_qe_alignments'].apply(ast.literal_eval)\n", + "df['mt_pe_tbd_qe_alignments_dict'] = df['mt_pe_tbd_qe_alignments'].apply(process_alignments)" + ] }, { "cell_type": "markdown", - "source": [], - "metadata": { - "collapsed": false - } + "metadata": {}, + "source": [ + "## Check errors distribution " + ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 248, + "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " 79%|███████▉ | 4098/5160 [23:06<07:44, 2.29it/s] " + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5160/5160 [00:01<00:00, 2670.68it/s]\n" ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unit_idlang_idmt_tbd_qe_tagsOKBAD-SUBBAD-INSBAD-CONBAD-EXPBAD-SHFBAD-DEL-RBAD-DEL-L
0flores101-main-ukr-100-pe1-1ukr{OK}TrueFalseFalseFalseFalseFalseFalseFalse
1flores101-main-ukr-100-pe1-1ukr{BAD-SUB}FalseTrueFalseFalseFalseFalseFalseFalse
2flores101-main-ukr-100-pe1-1ukr{BAD-SUB, BAD-DEL-R}FalseTrueFalseFalseFalseFalseTrueFalse
3flores101-main-ukr-100-pe1-1ukr{OK, BAD-DEL-L}TrueFalseFalseFalseFalseFalseFalseTrue
4flores101-main-ukr-100-pe1-1ukr{OK}TrueFalseFalseFalseFalseFalseFalseFalse
....................................
108478flores101-main-vie-48-pe1-4vie{OK}TrueFalseFalseFalseFalseFalseFalseFalse
108479flores101-main-vie-48-pe1-4vie{OK}TrueFalseFalseFalseFalseFalseFalseFalse
108480flores101-main-vie-48-pe1-4vie{OK}TrueFalseFalseFalseFalseFalseFalseFalse
108481flores101-main-vie-48-pe1-4vie{OK}TrueFalseFalseFalseFalseFalseFalseFalse
108482flores101-main-vie-48-pe1-4vie{OK}TrueFalseFalseFalseFalseFalseFalseFalse
\n", + "

108483 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " unit_id lang_id mt_tbd_qe_tags OK \\\n", + "0 flores101-main-ukr-100-pe1-1 ukr {OK} True \n", + "1 flores101-main-ukr-100-pe1-1 ukr {BAD-SUB} False \n", + "2 flores101-main-ukr-100-pe1-1 ukr {BAD-SUB, BAD-DEL-R} False \n", + "3 flores101-main-ukr-100-pe1-1 ukr {OK, BAD-DEL-L} True \n", + "4 flores101-main-ukr-100-pe1-1 ukr {OK} True \n", + "... ... ... ... ... \n", + "108478 flores101-main-vie-48-pe1-4 vie {OK} True \n", + "108479 flores101-main-vie-48-pe1-4 vie {OK} True \n", + "108480 flores101-main-vie-48-pe1-4 vie {OK} True \n", + "108481 flores101-main-vie-48-pe1-4 vie {OK} True \n", + "108482 flores101-main-vie-48-pe1-4 vie {OK} True \n", + "\n", + " BAD-SUB BAD-INS BAD-CON BAD-EXP BAD-SHF BAD-DEL-R BAD-DEL-L \n", + "0 False False False False False False False \n", + "1 True False False False False False False \n", + "2 True False False False False True False \n", + "3 False False False False False False True \n", + "4 False False False False False False False \n", + "... ... ... ... ... ... ... ... \n", + "108478 False False False False False False False \n", + "108479 False False False False False False False \n", + "108480 False False False False False False False \n", + "108481 False False False False False False False \n", + "108482 False False False False False False False \n", + "\n", + "[108483 rows x 11 columns]" + ] + }, + "execution_count": 248, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "# process data: read files, read lists to python lists, read alignments \n", "\n", - "df_overlap = pd.DataFrame()\n", - "\n", + "error_types_list = []\n", "for _id, x in tqdm(df.iterrows(), total=len(df)):\n", - " pe_tokens = ast.literal_eval(x['tgt_tokens'])\n", - " mt_tokens = ast.literal_eval(x['mt_tokens'])\n", - " mt_tbd_qe = ast.literal_eval(x['mt_tbd_qe'])\n", - " mt_wmt22_qe = ast.literal_eval(x['mt_wmt22_qe'])[:-1] # as omission rule right\n", + " pe_tokens = x['tgt_tokens']\n", + " mt_tokens = x['mt_tokens']\n", + " mt_tbd_qe = x['mt_tbd_qe']\n", + " mt_wmt22_qe = x['mt_wmt22_qe'][:-1] # as omission rule right\n", + " mt_pe_alignments_dict = x['mt_pe_tbd_qe_alignments_dict']\n", "\n", - " mt_pe_alignments_raw = ast.literal_eval(x['mt_pe_tbd_qe_alignments'])\n", - " mt_pe_alignments_dict = defaultdict(list)\n", + " for i, mt_tok in enumerate(mt_tokens):\n", + " paired_pe_tok_i = mt_pe_alignments_dict[i][0] if mt_pe_alignments_dict[i] else None # SUB have to be paired with one PE token\n", + " if paired_pe_tok_i is None:\n", + " continue\n", "\n", - " for k, v, score in mt_pe_alignments_raw:\n", - " if k is not None:\n", - " mt_pe_alignments_dict[k].append(v)\n", + " tbd_qe_tags = mt_tbd_qe[i]\n", "\n", - " for i, mt_tok in enumerate(mt_tokens):\n", + " error_types_list.append({\n", + " 'unit_id': _id,\n", + " 'lang_id': x['lang_id'],\n", + " 'mt_tbd_qe_tags': tbd_qe_tags,\n", + " **{\n", + " error_name: error_name in tbd_qe_tags\n", + " for error_name in ['OK', 'BAD-SUB', 'BAD-INS', 'BAD-CON', 'BAD-EXP', 'BAD-SHF', 'BAD-DEL-R', 'BAD-DEL-L']\n", + " },\n", + " })\n", + "\n", + "df_error_types = pd.DataFrame(error_types_list)\n", "\n", + "df_error_types" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "## Check overlap with wmt22" + ] + }, + { + "cell_type": "code", + "execution_count": 249, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5160/5160 [00:01<00:00, 3504.74it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unit_idlang_idmt_tokpe_tokmt_tbd_qemt_wmt22_qe
0flores101-main-ukr-100-pe1-1ukrШокШокOKOK
1flores101-main-ukr-100-pe1-1ukrпривідBAD-SUBBAD
2flores101-main-ukr-100-pe1-1ukrвступіповерненняBAD-SUBBAD
3flores101-main-ukr-100-pe1-1ukrвступіповерненняBAD-DEL-RBAD
4flores101-main-ukr-100-pe1-1ukrнастаєнастаєOKOK
.....................
141186flores101-main-vie-48-pe1-4vienốinốiOKOK
141187flores101-main-vie-48-pe1-4vievớivớiOKOK
141188flores101-main-vie-48-pe1-4viedịch vụdịch vụOKOK
141189flores101-main-vie-48-pe1-4vienàynàyOKOK
141190flores101-main-vie-48-pe1-4vie..OKOK
\n", + "

141191 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " unit_id lang_id mt_tok pe_tok mt_tbd_qe \\\n", + "0 flores101-main-ukr-100-pe1-1 ukr Шок Шок OK \n", + "1 flores101-main-ukr-100-pe1-1 ukr при від BAD-SUB \n", + "2 flores101-main-ukr-100-pe1-1 ukr вступі повернення BAD-SUB \n", + "3 flores101-main-ukr-100-pe1-1 ukr вступі повернення BAD-DEL-R \n", + "4 flores101-main-ukr-100-pe1-1 ukr настає настає OK \n", + "... ... ... ... ... ... \n", + "141186 flores101-main-vie-48-pe1-4 vie nối nối OK \n", + "141187 flores101-main-vie-48-pe1-4 vie với với OK \n", + "141188 flores101-main-vie-48-pe1-4 vie dịch vụ dịch vụ OK \n", + "141189 flores101-main-vie-48-pe1-4 vie này này OK \n", + "141190 flores101-main-vie-48-pe1-4 vie . . OK \n", + "\n", + " mt_wmt22_qe \n", + "0 OK \n", + "1 BAD \n", + "2 BAD \n", + "3 BAD \n", + "4 OK \n", + "... ... \n", + "141186 OK \n", + "141187 OK \n", + "141188 OK \n", + "141189 OK \n", + "141190 OK \n", + "\n", + "[141191 rows x 6 columns]" + ] + }, + "execution_count": 249, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# process data: read files, read lists to python lists, read alignments \n", + "\n", + "overlap_list = []\n", + "for _id, x in tqdm(df.iterrows(), total=len(df)):\n", + " pe_tokens = x['tgt_tokens']\n", + " mt_tokens = x['mt_tokens']\n", + " mt_tbd_qe = x['mt_tbd_qe']\n", + " mt_wmt22_qe = x['mt_wmt22_qe'][:-1] # as omission rule right\n", + " mt_pe_alignments_dict = x['mt_pe_tbd_qe_alignments_dict']\n", + "\n", + " for i, mt_tok in enumerate(mt_tokens):\n", " paired_pe_tok_i = mt_pe_alignments_dict[i][0] if mt_pe_alignments_dict[i] else None # SUB have to be paired with one PE token\n", " if paired_pe_tok_i is None:\n", " continue\n", @@ -189,106 +948,677 @@ " tbd_qe_tags = mt_tbd_qe[i]\n", "\n", " for tbd_qe_tag in tbd_qe_tags:\n", - " _df_tok_stats = pd.DataFrame([{\n", + " overlap_list.append({\n", " 'unit_id': _id,\n", " 'lang_id': x['lang_id'],\n", " 'mt_tok': mt_tok,\n", " 'pe_tok': pe_tokens[paired_pe_tok_i],\n", " 'mt_tbd_qe': tbd_qe_tag,\n", " 'mt_wmt22_qe': mt_wmt22_qe[i],\n", - " }])\n", - " df_overlap = pd.concat([df_overlap, _df_tok_stats], ignore_index=True)\n", + " })\n", + "\n", + "df_overlap = pd.DataFrame(overlap_list)\n", "\n", "df_overlap" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 250, "metadata": { - "collapsed": false, "ExecuteTime": { - "end_time": "2023-08-27T13:52:55.913558Z", - "start_time": "2023-08-27T13:17:51.561633Z" + "end_time": "2023-09-08T13:33:27.754556Z", + "start_time": "2023-09-08T13:33:24.589114Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: BAD-DEL and BAD-SHF is overlapping with other cats\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lang_idaraitanldturukrvie
mt_wmt22_qeBADOKBADOKBADOKBADOKBADOKBADOK
mt_tbd_qe
BAD-CON99368584214151491018923
BAD-DEL-L57618866018662418845617911653191265498
BAD-DEL-R504515566692478635459387116075711331282
BAD-EXP36381771723616387175992868261
BAD-INS0010100031220
BAD-SHF1237109010829551411119513538662593121724862351
BAD-SUB205502366217262221704294121442
OK69014073753173149921589573512030110610296184214551
\n", + "
" + ], + "text/plain": [ + "lang_id ara ita nld tur ukr \\\n", + "mt_wmt22_qe BAD OK BAD OK BAD OK BAD OK BAD OK \n", + "mt_tbd_qe \n", + "BAD-CON 99 3 68 5 84 2 141 5 149 10 \n", + "BAD-DEL-L 576 188 660 186 624 188 456 179 1165 319 \n", + "BAD-DEL-R 504 515 566 692 478 635 459 387 1160 757 \n", + "BAD-EXP 363 8 177 17 236 16 387 17 599 28 \n", + "BAD-INS 0 0 1 0 1 0 0 0 3 1 \n", + "BAD-SHF 1237 1090 1082 955 1411 1195 1353 866 2593 1217 \n", + "BAD-SUB 2055 0 2366 2 1726 2 2217 0 4294 1 \n", + "OK 690 14073 753 17314 992 15895 735 12030 1106 10296 \n", + "\n", + "lang_id vie \n", + "mt_wmt22_qe BAD OK \n", + "mt_tbd_qe \n", + "BAD-CON 189 23 \n", + "BAD-DEL-L 1265 498 \n", + "BAD-DEL-R 1133 1282 \n", + "BAD-EXP 682 61 \n", + "BAD-INS 22 0 \n", + "BAD-SHF 2486 2351 \n", + "BAD-SUB 2144 2 \n", + "OK 1842 14551 " + ] + }, + "execution_count": 250, + "metadata": {}, + "output_type": "execute_result" } - } - }, - { - "cell_type": "markdown", - "source": [ - "## Check overlap with wmt22" ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 9, - "outputs": [], "source": [ "print('Note: BAD-DEL and BAD-SHF is overlapping with other cats')\n", - "pd.crosstab(df_overlap['mt_tbd_qe'], [df_overlap['lang_id'], df_overlap['mt_wmt22_qe']], rownames=['mt_tbd_qe'], colnames=['lang_id', 'mt_wmt22_qe'])" - ], - "metadata": { - "collapsed": false - } + "pd.crosstab(\n", + " df_overlap['mt_tbd_qe'], \n", + " [df_overlap['lang_id'], df_overlap['mt_wmt22_qe']], \n", + " rownames=['mt_tbd_qe'], \n", + " colnames=['lang_id', 'mt_wmt22_qe']\n", + ")" + ] }, { "cell_type": "code", - "execution_count": 10, - "outputs": [], + "execution_count": 289, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unit_idlang_idmt_tokpe_tokmt_tbd_qemt_wmt22_qe
133931flores101-main-vie-25-pe1-2vieOKBAD
121804flores101-main-vie-93-pe2-1viecủacủaOKBAD
60478flores101-main-ita-91-pe2-3itanonnonOKBAD
14086flores101-main-ukr-75-pe2-4ukr18611861OKBAD
10197flores101-main-ukr-28-pe1-3ukrІнтернетуІнтернетуOKBAD
127553flores101-main-vie-50-pe2-3vieđạiđạiOKBAD
38197flores101-main-ara-95-pe1-3araالفناءالفناءOKBAD
89619flores101-main-nld-62-pe2-3nldininOKBAD
100122flores101-main-tur-107-pe2-4turAvrupaAvrupaOKBAD
10221flores101-main-ukr-28-pe1-4ukrстосункистосункиOKBAD
\n", + "
" + ], + "text/plain": [ + " unit_id lang_id mt_tok pe_tok mt_tbd_qe \\\n", + "133931 flores101-main-vie-25-pe1-2 vie và và OK \n", + "121804 flores101-main-vie-93-pe2-1 vie của của OK \n", + "60478 flores101-main-ita-91-pe2-3 ita non non OK \n", + "14086 flores101-main-ukr-75-pe2-4 ukr 1861 1861 OK \n", + "10197 flores101-main-ukr-28-pe1-3 ukr Інтернету Інтернету OK \n", + "127553 flores101-main-vie-50-pe2-3 vie đại đại OK \n", + "38197 flores101-main-ara-95-pe1-3 ara الفناء الفناء OK \n", + "89619 flores101-main-nld-62-pe2-3 nld in in OK \n", + "100122 flores101-main-tur-107-pe2-4 tur Avrupa Avrupa OK \n", + "10221 flores101-main-ukr-28-pe1-4 ukr стосунки стосунки OK \n", + "\n", + " mt_wmt22_qe \n", + "133931 BAD \n", + "121804 BAD \n", + "60478 BAD \n", + "14086 BAD \n", + "10197 BAD \n", + "127553 BAD \n", + "38197 BAD \n", + "89619 BAD \n", + "100122 BAD \n", + "10221 BAD " + ] + }, + "execution_count": 289, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_overlap[(df_overlap['mt_tbd_qe'] == 'OK') & (df_overlap['mt_wmt22_qe'] == 'BAD')].sample(10)" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 11, - "outputs": [], + "execution_count": 87, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unit_idlang_idmt_tokpe_tokmt_tbd_qemt_wmt22_qe
116969flores101-main-vie-46-pe1-2viechính phủchính phủBAD-EXPOK
105196flores101-main-tur-89-pe2-1turbüyürkenbüyürkenBAD-EXPOK
126690flores101-main-vie-41-pe1-1viekhoa họccơ bảnBAD-EXPOK
119938flores101-main-vie-75-pe2-2vieđịa điểmkỳ nghỉBAD-EXPOK
111608flores101-main-tur-85-pe1-2tur..BAD-EXPOK
\n", + "
" + ], + "text/plain": [ + " unit_id lang_id mt_tok pe_tok mt_tbd_qe \\\n", + "116969 flores101-main-vie-46-pe1-2 vie chính phủ chính phủ BAD-EXP \n", + "105196 flores101-main-tur-89-pe2-1 tur büyürken büyürken BAD-EXP \n", + "126690 flores101-main-vie-41-pe1-1 vie khoa học cơ bản BAD-EXP \n", + "119938 flores101-main-vie-75-pe2-2 vie địa điểm kỳ nghỉ BAD-EXP \n", + "111608 flores101-main-tur-85-pe1-2 tur . . BAD-EXP \n", + "\n", + " mt_wmt22_qe \n", + "116969 OK \n", + "105196 OK \n", + "126690 OK \n", + "119938 OK \n", + "111608 OK " + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_overlap[(df_overlap['mt_tbd_qe'] == 'BAD-EXP') & (df_overlap['mt_wmt22_qe'] == 'OK')].sample(5)" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 12, - "outputs": [], - "source": [ - "df_overlap" - ], + "execution_count": 259, "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 13, - "outputs": [], - "source": [ - "pd.crosstab(df_overlap['mt_tbd_qe'], df_overlap['mt_wmt22_qe']).T" + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mt_tbd_qeOKBAD-SUBBAD-INSBAD-CONBAD-EXPBAD-SHFBAD-DEL-RBAD-DEL-L
mt_wmt22_qe
BAD6118148022773024441016243004746
OK841597148147767442681558
\n", + "
" + ], + "text/plain": [ + "mt_tbd_qe OK BAD-SUB BAD-INS BAD-CON BAD-EXP BAD-SHF BAD-DEL-R \\\n", + "mt_wmt22_qe \n", + "BAD 6118 14802 27 730 2444 10162 4300 \n", + "OK 84159 7 1 48 147 7674 4268 \n", + "\n", + "mt_tbd_qe BAD-DEL-L \n", + "mt_wmt22_qe \n", + "BAD 4746 \n", + "OK 1558 " + ] + }, + "execution_count": 259, + "metadata": {}, + "output_type": "execute_result" + } ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 14, - "outputs": [], "source": [ - "df_overlap" - ], - "metadata": { - "collapsed": false - } + "pd.crosstab(\n", + " df_overlap['mt_tbd_qe'], \n", + " df_overlap['mt_wmt22_qe'],\n", + ").T[['OK', 'BAD-SUB', 'BAD-INS', 'BAD-CON', 'BAD-EXP', 'BAD-SHF', 'BAD-DEL-R', 'BAD-DEL-L']]" + ] }, { "cell_type": "code", - "execution_count": 15, - "outputs": [], + "execution_count": 91, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "plt.figure(figsize=(12, 4))\n", "sns.countplot(\n", @@ -297,65 +1627,281 @@ " hue='mt_wmt22_qe',\n", ")\n", "plt.show()" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "## Analyse BAD-SUB" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 162, + "execution_count": 95, + "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 5160/5160 [00:58<00:00, 88.93it/s] \n" + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5160/5160 [00:00<00:00, 8042.61it/s]\n" ] }, { "data": { - "text/plain": " unit_id lang_id mt_tok pe_tok mt_pos \\\n0 flores101-main-ukr-100-pe1-1 ukr при від ADP \n1 flores101-main-ukr-100-pe1-1 ukr вступі повернення NOUN \n2 flores101-main-ukr-100-pe1-1 ukr фази фаза NOUN \n3 flores101-main-ukr-100-pe1-1 ukr бути проходити AUX \n4 flores101-main-ukr-100-pe1-3 ukr Повернувшись Проживши VERB \n... ... ... ... ... ... \n14804 flores101-main-vie-48-pe1-3 vie bằng trên ADP \n14805 flores101-main-vie-48-pe1-3 vie vận chuyển tàu thuyền VERB \n14806 flores101-main-vie-48-pe1-3 vie cuộc đoàn NOUN \n14807 flores101-main-vie-48-pe1-3 vie thoại truyền NOUN \n14808 flores101-main-vie-48-pe1-4 vie điện thoại thông NOUN \n\n pe_pos same_word same_pos same_lemma same_morf same_deprel \n0 ADP False True False False True \n1 NOUN False True False False False \n2 NOUN False True True False False \n3 VERB False False False True False \n4 VERB False True False True True \n... ... ... ... ... ... ... \n14804 ADP False True False True True \n14805 NOUN False False False True False \n14806 NOUN False True False True True \n14807 VERB False False False True True \n14808 ADJ False False False True False \n\n[14809 rows x 11 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
unit_idlang_idmt_tokpe_tokmt_pospe_possame_wordsame_possame_lemmasame_morfsame_deprel
0flores101-main-ukr-100-pe1-1ukrпривідADPADPFalseTrueFalseFalseTrue
1flores101-main-ukr-100-pe1-1ukrвступіповерненняNOUNNOUNFalseTrueFalseFalseFalse
2flores101-main-ukr-100-pe1-1ukrфазифазаNOUNNOUNFalseTrueTrueFalseFalse
3flores101-main-ukr-100-pe1-1ukrбутипроходитиAUXVERBFalseFalseFalseTrueFalse
4flores101-main-ukr-100-pe1-3ukrПовернувшисьПрожившиVERBVERBFalseTrueFalseTrueTrue
....................................
14804flores101-main-vie-48-pe1-3viebằngtrênADPADPFalseTrueFalseTrueTrue
14805flores101-main-vie-48-pe1-3vievận chuyểntàu thuyềnVERBNOUNFalseFalseFalseTrueFalse
14806flores101-main-vie-48-pe1-3viecuộcđoànNOUNNOUNFalseTrueFalseTrueTrue
14807flores101-main-vie-48-pe1-3viethoạitruyềnNOUNVERBFalseFalseFalseTrueTrue
14808flores101-main-vie-48-pe1-4vieđiện thoạithôngNOUNADJFalseFalseFalseTrueFalse
\n

14809 rows × 11 columns

\n
" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unit_idlang_idmt_tokpe_tokmt_pospe_possame_wordsame_possame_lemmasame_morfsame_deprel
0flores101-main-ukr-100-pe1-1ukrпривідADPADPFalseTrueFalseFalseTrue
1flores101-main-ukr-100-pe1-1ukrвступіповерненняNOUNNOUNFalseTrueFalseFalseFalse
2flores101-main-ukr-100-pe1-1ukrфазифазаNOUNNOUNFalseTrueTrueFalseFalse
3flores101-main-ukr-100-pe1-1ukrбутипроходитиAUXVERBFalseFalseFalseTrueFalse
4flores101-main-ukr-100-pe1-3ukrПовернувшисьПрожившиVERBVERBFalseTrueFalseTrueTrue
....................................
14804flores101-main-vie-48-pe1-3viebằngtrênADPADPFalseTrueFalseTrueTrue
14805flores101-main-vie-48-pe1-3vievận chuyểntàu thuyềnVERBNOUNFalseFalseFalseTrueFalse
14806flores101-main-vie-48-pe1-3viecuộcđoànNOUNNOUNFalseTrueFalseTrueTrue
14807flores101-main-vie-48-pe1-3viethoạitruyềnNOUNVERBFalseFalseFalseTrueTrue
14808flores101-main-vie-48-pe1-4vieđiện thoạithôngNOUNADJFalseFalseFalseTrueFalse
\n", + "

14809 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " unit_id lang_id mt_tok pe_tok mt_pos \\\n", + "0 flores101-main-ukr-100-pe1-1 ukr при від ADP \n", + "1 flores101-main-ukr-100-pe1-1 ukr вступі повернення NOUN \n", + "2 flores101-main-ukr-100-pe1-1 ukr фази фаза NOUN \n", + "3 flores101-main-ukr-100-pe1-1 ukr бути проходити AUX \n", + "4 flores101-main-ukr-100-pe1-3 ukr Повернувшись Проживши VERB \n", + "... ... ... ... ... ... \n", + "14804 flores101-main-vie-48-pe1-3 vie bằng trên ADP \n", + "14805 flores101-main-vie-48-pe1-3 vie vận chuyển tàu thuyền VERB \n", + "14806 flores101-main-vie-48-pe1-3 vie cuộc đoàn NOUN \n", + "14807 flores101-main-vie-48-pe1-3 vie thoại truyền NOUN \n", + "14808 flores101-main-vie-48-pe1-4 vie điện thoại thông NOUN \n", + "\n", + " pe_pos same_word same_pos same_lemma same_morf same_deprel \n", + "0 ADP False True False False True \n", + "1 NOUN False True False False False \n", + "2 NOUN False True True False False \n", + "3 VERB False False False True False \n", + "4 VERB False True False True True \n", + "... ... ... ... ... ... ... \n", + "14804 ADP False True False True True \n", + "14805 NOUN False False False True False \n", + "14806 NOUN False True False True True \n", + "14807 VERB False False False True True \n", + "14808 ADJ False False False True False \n", + "\n", + "[14809 rows x 11 columns]" + ] }, - "execution_count": 162, + "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# collect all BAD-SUB: read files, filter sents with BAD-SUB token, process to python lists, read alignments\n", - "df_stats = pd.DataFrame()\n", "\n", + "stats_list = []\n", "for _id, x in tqdm(df.iterrows(), total=len(df)):\n", - " pe_tokens = ast.literal_eval(x['tgt_tokens'])\n", - " pe_annotations = ast.literal_eval(x['tgt_annotations'])\n", - " mt_annotations = ast.literal_eval(x['mt_annotations'])\n", - " mt_tokens = ast.literal_eval(x['mt_tokens'])\n", - " mt_tbd_qe = ast.literal_eval(x['mt_tbd_qe'])\n", - " mt_pe_alignments_raw = ast.literal_eval(x['mt_pe_tbd_qe_alignments'])\n", - " mt_pe_alignments_dict = defaultdict(list)\n", - "\n", - " for k, v, score in mt_pe_alignments_raw:\n", - " if k is not None:\n", - " mt_pe_alignments_dict[k].append(v)\n", - "\n", + " pe_tokens = x['tgt_tokens']\n", + " pe_annotations = x['tgt_annotations']\n", + " mt_tokens = x['mt_tokens']\n", + " mt_annotations = x['mt_annotations']\n", + " mt_tbd_qe = x['mt_tbd_qe']\n", + " mt_wmt22_qe = x['mt_wmt22_qe'][:-1] # as omission rule right\n", + " mt_pe_alignments_dict = x['mt_pe_tbd_qe_alignments_dict']\n", + " \n", " for i, mt_tok in enumerate(mt_tokens):\n", " if 'BAD-SUB' in mt_tbd_qe[i]:\n", " paired_pe_tok_i = mt_pe_alignments_dict[i][0] if mt_pe_alignments_dict[i] else None # SUB have to be paired with one PE token\n", " if paired_pe_tok_i is None:\n", " continue\n", "\n", - " _df_tok_stats = pd.DataFrame([{\n", + " stats_list.append({\n", " 'unit_id': _id,\n", " 'lang_id': x['lang_id'],\n", " 'mt_tok': mt_tok,\n", @@ -367,41 +1913,194 @@ " 'same_lemma': mt_annotations[i]['lemma'] == pe_annotations[paired_pe_tok_i]['lemma'],\n", " 'same_morf': mt_annotations[i]['feats'] == pe_annotations[paired_pe_tok_i]['feats'],\n", " 'same_deprel': mt_annotations[i]['deprel'] == pe_annotations[paired_pe_tok_i]['deprel'],\n", - " }])\n", - " df_stats = pd.concat([df_stats, _df_tok_stats], ignore_index=True)\n", - "\n", + " })\n", "\n", + "df_stats = pd.DataFrame(stats_list)\n", "df_stats = df_stats.astype({'same_word': bool, 'same_pos': bool, 'same_lemma': bool, 'same_morf': bool, 'same_deprel': bool})\n", "\n", "df_stats" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-18T21:01:51.313256Z", - "start_time": "2023-07-18T21:00:53.213021Z" - } - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "---" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 413, + "execution_count": 97, + "metadata": {}, "outputs": [ { "data": { - "text/plain": " total_sub diff_pos same_pos \\\nunit_id lang_id \nflores101-main-ara-1-pe1-1 ara 3 1 2 \nflores101-main-ara-1-pe1-4 ara 2 2 0 \nflores101-main-ara-1-pe2-2 ara 5 3 2 \nflores101-main-ara-1-pe2-3 ara 1 0 1 \nflores101-main-ara-1-pe2-4 ara 3 2 1 \n... ... ... ... \nflores101-main-vie-99-pe1-4 vie 1 1 0 \nflores101-main-vie-99-pe2-1 vie 3 0 3 \nflores101-main-vie-99-pe2-2 vie 3 1 2 \nflores101-main-vie-99-pe2-3 vie 7 1 6 \nflores101-main-vie-99-pe2-4 vie 4 0 4 \n\n diff_pos_percent \nunit_id lang_id \nflores101-main-ara-1-pe1-1 ara 0.333333 \nflores101-main-ara-1-pe1-4 ara 1.000000 \nflores101-main-ara-1-pe2-2 ara 0.600000 \nflores101-main-ara-1-pe2-3 ara 0.000000 \nflores101-main-ara-1-pe2-4 ara 0.666667 \n... ... \nflores101-main-vie-99-pe1-4 vie 1.000000 \nflores101-main-vie-99-pe2-1 vie 0.000000 \nflores101-main-vie-99-pe2-2 vie 0.333333 \nflores101-main-vie-99-pe2-3 vie 0.142857 \nflores101-main-vie-99-pe2-4 vie 0.000000 \n\n[4088 rows x 4 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
total_subdiff_possame_posdiff_pos_percent
unit_idlang_id
flores101-main-ara-1-pe1-1ara3120.333333
flores101-main-ara-1-pe1-4ara2201.000000
flores101-main-ara-1-pe2-2ara5320.600000
flores101-main-ara-1-pe2-3ara1010.000000
flores101-main-ara-1-pe2-4ara3210.666667
..................
flores101-main-vie-99-pe1-4vie1101.000000
flores101-main-vie-99-pe2-1vie3030.000000
flores101-main-vie-99-pe2-2vie3120.333333
flores101-main-vie-99-pe2-3vie7160.142857
flores101-main-vie-99-pe2-4vie4040.000000
\n

4088 rows × 4 columns

\n
" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
total_subdiff_possame_posdiff_pos_percent
unit_idlang_id
flores101-main-ara-1-pe1-1ara3120.333333
flores101-main-ara-1-pe1-4ara2201.000000
flores101-main-ara-1-pe2-2ara5320.600000
flores101-main-ara-1-pe2-3ara1010.000000
flores101-main-ara-1-pe2-4ara3210.666667
..................
flores101-main-vie-99-pe1-4vie1101.000000
flores101-main-vie-99-pe2-1vie3030.000000
flores101-main-vie-99-pe2-2vie3120.333333
flores101-main-vie-99-pe2-3vie7160.142857
flores101-main-vie-99-pe2-4vie4040.000000
\n", + "

4088 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " total_sub diff_pos same_pos \\\n", + "unit_id lang_id \n", + "flores101-main-ara-1-pe1-1 ara 3 1 2 \n", + "flores101-main-ara-1-pe1-4 ara 2 2 0 \n", + "flores101-main-ara-1-pe2-2 ara 5 3 2 \n", + "flores101-main-ara-1-pe2-3 ara 1 0 1 \n", + "flores101-main-ara-1-pe2-4 ara 3 2 1 \n", + "... ... ... ... \n", + "flores101-main-vie-99-pe1-4 vie 1 1 0 \n", + "flores101-main-vie-99-pe2-1 vie 3 0 3 \n", + "flores101-main-vie-99-pe2-2 vie 3 1 2 \n", + "flores101-main-vie-99-pe2-3 vie 7 1 6 \n", + "flores101-main-vie-99-pe2-4 vie 4 0 4 \n", + "\n", + " diff_pos_percent \n", + "unit_id lang_id \n", + "flores101-main-ara-1-pe1-1 ara 0.333333 \n", + "flores101-main-ara-1-pe1-4 ara 1.000000 \n", + "flores101-main-ara-1-pe2-2 ara 0.600000 \n", + "flores101-main-ara-1-pe2-3 ara 0.000000 \n", + "flores101-main-ara-1-pe2-4 ara 0.666667 \n", + "... ... \n", + "flores101-main-vie-99-pe1-4 vie 1.000000 \n", + "flores101-main-vie-99-pe2-1 vie 0.000000 \n", + "flores101-main-vie-99-pe2-2 vie 0.333333 \n", + "flores101-main-vie-99-pe2-3 vie 0.142857 \n", + "flores101-main-vie-99-pe2-4 vie 0.000000 \n", + "\n", + "[4088 rows x 4 columns]" + ] }, - "execution_count": 413, + "execution_count": 97, "metadata": {}, "output_type": "execute_result" } @@ -415,23 +2114,28 @@ "df_stats_ext_sum = df_stats_ext_sum[(df_stats_ext_sum['total_sub'] < 10) & (df_stats_ext_sum['diff_pos'] < 6)]\n", "df_stats_ext_sum['diff_pos_percent'] = df_stats_ext_sum['diff_pos'] / df_stats_ext_sum['total_sub']\n", "df_stats_ext_sum" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-19T10:23:35.738184Z", - "start_time": "2023-07-19T10:23:35.680165Z" - } - } + ] }, { "cell_type": "code", - "execution_count": 414, + "execution_count": 98, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-19T10:23:59.005619Z", + "start_time": "2023-07-19T10:23:54.861742Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "
", - "image/png": "" + "image/png": "", + "text/plain": [ + "
" + ] }, "metadata": {}, "output_type": "display_data" @@ -454,31 +2158,38 @@ "ax.set(xlim=(0, 1))\n", "f.suptitle(\"Percent of diff_pos of all BAD-SUB\", fontsize=12)\n", "plt.show()" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-19T10:23:59.005619Z", - "start_time": "2023-07-19T10:23:54.861742Z" - } - } + ] }, { "cell_type": "code", - "execution_count": 416, + "execution_count": 99, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-19T10:28:23.548919Z", + "start_time": "2023-07-19T10:28:22.673989Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "" + "text/plain": [ + "" + ] }, - "execution_count": 416, + "execution_count": 99, "metadata": {}, "output_type": "execute_result" }, { "data": { - "text/plain": "
", - "image/png": "" + "image/png": "", + "text/plain": [ + "
" + ] }, "metadata": {}, "output_type": "display_data" @@ -490,23 +2201,28 @@ " x=\"diff_pos_percent\",\n", " kde=True,\n", ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-19T10:28:23.548919Z", - "start_time": "2023-07-19T10:28:22.673989Z" - } - } + ] }, { "cell_type": "code", "execution_count": 415, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-19T10:24:00.116236Z", + "start_time": "2023-07-19T10:23:59.016911Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "
", - "image/png": "" + "image/png": "", + "text/plain": [ + "
" + ] }, "metadata": {}, "output_type": "display_data" @@ -520,27 +2236,29 @@ " fmt=\".0f\",\n", ").set(title='#total_sub vs #diff_pos in sentences')\n", "plt.show()" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-19T10:24:00.116236Z", - "start_time": "2023-07-19T10:23:59.016911Z" - } - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "## Analyse Syntax Trees (using Stanza & Tree Edit Distance)" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 101, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [], "source": [ "# process saved stanza sentences to original format\n", @@ -565,77 +2283,194 @@ "\n", " sentence = StanzaSentence(tokens=tokens)\n", " return sentence" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 304, + "execution_count": 102, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-19T09:36:35.619460Z", + "start_time": "2023-07-19T09:30:26.771815Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " 0%| | 22/5160 [00:01<06:32, 13.08it/s]19-Jul 11:30:28 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 2%|▏ | 109/5160 [00:06<06:06, 13.76it/s]19-Jul 11:30:33 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 7%|▋ | 386/5160 [00:33<05:47, 13.72it/s]19-Jul 11:31:00 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:31:00 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 9%|▉ | 462/5160 [00:42<04:36, 17.01it/s]19-Jul 11:31:09 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 14%|█▍ | 724/5160 [01:06<14:20, 5.15it/s]19-Jul 11:31:32 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:31:32 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 14%|█▍ | 738/5160 [01:06<04:37, 15.93it/s]19-Jul 11:31:33 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:31:33 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 19%|█▉ | 974/5160 [01:21<02:10, 31.98it/s]19-Jul 11:31:48 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 20%|█▉ | 1015/5160 [01:23<02:57, 23.34it/s]19-Jul 11:31:49 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 22%|██▏ | 1123/5160 [01:26<02:24, 27.99it/s]19-Jul 11:31:53 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 23%|██▎ | 1175/5160 [01:29<02:29, 26.71it/s]19-Jul 11:31:55 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 24%|██▍ | 1242/5160 [01:31<02:34, 25.42it/s]19-Jul 11:31:58 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 24%|██▍ | 1260/5160 [01:32<03:43, 17.42it/s]19-Jul 11:31:59 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 27%|██▋ | 1394/5160 [01:39<03:37, 17.31it/s]19-Jul 11:32:06 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:32:06 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 30%|██▉ | 1538/5160 [01:46<02:30, 24.05it/s]19-Jul 11:32:13 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:32:13 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 38%|███▊ | 1935/5160 [02:11<04:04, 13.19it/s]19-Jul 11:32:37 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 41%|████ | 2090/5160 [02:20<02:30, 20.35it/s]19-Jul 11:32:47 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:32:47 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 43%|████▎ | 2228/5160 [02:27<02:17, 21.36it/s]19-Jul 11:32:54 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:32:54 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 49%|████▊ | 2512/5160 [02:46<04:09, 10.62it/s]19-Jul 11:33:12 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:33:12 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 60%|█████▉ | 3092/5160 [03:29<02:34, 13.40it/s]19-Jul 11:33:56 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 65%|██████▌ | 3373/5160 [03:52<02:09, 13.85it/s]19-Jul 11:34:19 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:34:19 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 67%|██████▋ | 3443/5160 [03:57<01:31, 18.77it/s]19-Jul 11:34:24 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 84%|████████▎ | 4309/5160 [04:38<00:50, 16.71it/s]19-Jul 11:35:05 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 84%|████████▍ | 4339/5160 [04:41<01:39, 8.23it/s]19-Jul 11:35:07 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 86%|████████▌ | 4427/5160 [04:50<01:11, 10.23it/s]19-Jul 11:35:17 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:35:17 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 87%|████████▋ | 4473/5160 [04:54<00:52, 13.00it/s]19-Jul 11:35:20 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:35:20 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 88%|████████▊ | 4520/5160 [04:58<01:05, 9.77it/s]19-Jul 11:35:25 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 89%|████████▉ | 4613/5160 [05:08<00:47, 11.50it/s]19-Jul 11:35:35 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 91%|█████████ | 4693/5160 [05:16<00:50, 9.33it/s]19-Jul 11:35:42 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:35:42 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 91%|█████████ | 4702/5160 [05:16<00:42, 10.67it/s]19-Jul 11:35:43 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 91%|█████████▏| 4721/5160 [05:19<00:46, 9.50it/s]19-Jul 11:35:46 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:35:46 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 92%|█████████▏| 4749/5160 [05:22<00:34, 11.84it/s]19-Jul 11:35:49 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "19-Jul 11:35:49 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 93%|█████████▎| 4815/5160 [05:28<00:38, 8.94it/s]19-Jul 11:35:55 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 97%|█████████▋| 4985/5160 [05:42<00:14, 11.82it/s]19-Jul 11:36:09 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 97%|█████████▋| 5030/5160 [05:47<00:18, 7.12it/s]19-Jul 11:36:14 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - " 99%|█████████▉| 5133/5160 [05:56<00:02, 9.15it/s]19-Jul 11:36:22 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", - "100%|██████████| 5160/5160 [05:58<00:00, 14.41it/s]\n" + " 0%|▌ | 23/5160 [00:01<05:18, 16.14it/s]17-Sep 19:55:44 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 2%|██▋ | 109/5160 [00:09<04:46, 17.65it/s]17-Sep 19:55:52 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 7%|█████████▌ | 386/5160 [00:28<05:09, 15.44it/s]17-Sep 19:56:12 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 19:56:12 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 9%|███████████▎ | 462/5160 [00:34<04:54, 15.98it/s]17-Sep 19:56:17 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 14%|█████████████████▊ | 722/5160 [00:54<09:39, 7.66it/s]17-Sep 19:56:38 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 19:56:38 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 14%|██████████████████▏ | 739/5160 [00:55<04:19, 17.03it/s]17-Sep 19:56:39 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 19:56:39 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 19%|████████████████████████ | 977/5160 [01:07<01:51, 37.50it/s]17-Sep 19:56:51 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 20%|████████████████████████▋ | 1013/5160 [01:09<02:30, 27.48it/s]17-Sep 19:56:52 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 22%|███████████████████████████▍ | 1122/5160 [01:12<02:01, 33.31it/s]17-Sep 19:56:55 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 23%|████████████████████████████▋ | 1175/5160 [01:14<03:24, 19.48it/s]17-Sep 19:56:57 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 24%|██████████████████████████████▎ | 1239/5160 [01:16<02:17, 28.43it/s]17-Sep 19:57:00 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 24%|██████████████████████████████▋ | 1258/5160 [01:17<03:02, 21.43it/s]17-Sep 19:57:00 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 27%|██████████████████████████████████ | 1395/5160 [01:23<02:59, 20.96it/s]17-Sep 19:57:07 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 19:57:07 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 30%|█████████████████████████████████████▌ | 1538/5160 [01:30<04:02, 14.91it/s]17-Sep 19:57:13 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 19:57:13 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 38%|███████████████████████████████████████████████▎ | 1935/5160 [01:54<04:00, 13.43it/s]17-Sep 19:57:38 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 41%|███████████████████████████████████████████████████ | 2090/5160 [02:03<02:29, 20.52it/s]17-Sep 19:57:47 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 19:57:47 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 43%|██████████████████████████████████████████████████████▍ | 2229/5160 [02:11<02:07, 22.99it/s]17-Sep 19:57:54 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 19:57:54 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 49%|█████████████████████████████████████████████████████████████▎ | 2512/5160 [02:30<04:24, 10.02it/s]17-Sep 19:58:13 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 19:58:13 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 60%|███████████████████████████████████████████████████████████████████████████▌ | 3092/5160 [03:15<02:47, 12.33it/s]17-Sep 19:58:58 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 65%|██████████████████████████████████████████████████████████████████████████████████▎ | 3373/5160 [03:37<02:06, 14.13it/s]17-Sep 19:59:21 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 19:59:21 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 67%|████████████████████████████████████████████████████████████████████████████████████ | 3443/5160 [03:43<01:33, 18.32it/s]17-Sep 19:59:26 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 4309/5160 [04:25<00:49, 17.03it/s]17-Sep 20:00:08 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 4339/5160 [04:27<01:04, 12.75it/s]17-Sep 20:00:10 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 4428/5160 [04:35<01:04, 11.40it/s]17-Sep 20:00:18 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 20:00:18 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 4473/5160 [04:39<00:59, 11.54it/s]17-Sep 20:00:22 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 20:00:22 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 4521/5160 [04:44<01:20, 7.89it/s]17-Sep 20:00:27 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 4613/5160 [04:55<00:52, 10.44it/s]17-Sep 20:00:38 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 4693/5160 [05:02<00:41, 11.34it/s]17-Sep 20:00:45 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 20:00:45 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 4701/5160 [05:02<00:44, 10.37it/s]17-Sep 20:00:46 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 91%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 4721/5160 [05:05<00:48, 9.06it/s]17-Sep 20:00:48 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 20:00:48 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 4749/5160 [05:08<00:35, 11.45it/s]17-Sep 20:00:52 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "17-Sep 20:00:52 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 4815/5160 [05:16<00:38, 8.89it/s]17-Sep 20:00:59 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 4985/5160 [05:31<00:17, 9.88it/s]17-Sep 20:01:15 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 5030/5160 [05:35<00:15, 8.56it/s]17-Sep 20:01:19 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + " 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 5133/5160 [05:44<00:03, 7.01it/s]17-Sep 20:01:27 - [WARNING]: Can only create a tree for a sentence if it has one (and only one) root (Word.is_root). A tree for this sentence was not created.\n", + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5160/5160 [05:47<00:00, 14.87it/s]\n" ] }, { "data": { - "text/plain": " unit_id lang_id ted\n0 flores101-main-ukr-100-pe1-1 ukr 14\n1 flores101-main-ukr-100-pe1-2 ukr 4\n2 flores101-main-ukr-100-pe1-3 ukr 9\n3 flores101-main-ukr-100-pe1-4 ukr 12\n4 flores101-main-ukr-100-pe1-5 ukr 14\n... ... ... ...\n5155 flores101-main-vie-106-pe2-4 vie 16\n5156 flores101-main-vie-48-pe1-1 vie 11\n5157 flores101-main-vie-48-pe1-2 vie 27\n5158 flores101-main-vie-48-pe1-3 vie 16\n5159 flores101-main-vie-48-pe1-4 vie 10\n\n[5160 rows x 3 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
unit_idlang_idted
0flores101-main-ukr-100-pe1-1ukr14
1flores101-main-ukr-100-pe1-2ukr4
2flores101-main-ukr-100-pe1-3ukr9
3flores101-main-ukr-100-pe1-4ukr12
4flores101-main-ukr-100-pe1-5ukr14
............
5155flores101-main-vie-106-pe2-4vie16
5156flores101-main-vie-48-pe1-1vie11
5157flores101-main-vie-48-pe1-2vie27
5158flores101-main-vie-48-pe1-3vie16
5159flores101-main-vie-48-pe1-4vie10
\n

5160 rows × 3 columns

\n
" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unit_idlang_idted
0flores101-main-ukr-100-pe1-1ukr14
1flores101-main-ukr-100-pe1-2ukr4
2flores101-main-ukr-100-pe1-3ukr9
3flores101-main-ukr-100-pe1-4ukr12
4flores101-main-ukr-100-pe1-5ukr14
............
5155flores101-main-vie-106-pe2-4vie16
5156flores101-main-vie-48-pe1-1vie11
5157flores101-main-vie-48-pe1-2vie27
5158flores101-main-vie-48-pe1-3vie16
5159flores101-main-vie-48-pe1-4vie10
\n", + "

5160 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " unit_id lang_id ted\n", + "0 flores101-main-ukr-100-pe1-1 ukr 14\n", + "1 flores101-main-ukr-100-pe1-2 ukr 4\n", + "2 flores101-main-ukr-100-pe1-3 ukr 9\n", + "3 flores101-main-ukr-100-pe1-4 ukr 12\n", + "4 flores101-main-ukr-100-pe1-5 ukr 14\n", + "... ... ... ...\n", + "5155 flores101-main-vie-106-pe2-4 vie 16\n", + "5156 flores101-main-vie-48-pe1-1 vie 11\n", + "5157 flores101-main-vie-48-pe1-2 vie 27\n", + "5158 flores101-main-vie-48-pe1-3 vie 16\n", + "5159 flores101-main-vie-48-pe1-4 vie 10\n", + "\n", + "[5160 rows x 3 columns]" + ] }, - "execution_count": 304, + "execution_count": 102, "metadata": {}, "output_type": "execute_result" } @@ -643,8 +2478,6 @@ "source": [ "# use astred to calculate tree edit distance\n", "\n", - "df_synt_scores = pd.DataFrame()\n", - "\n", "langs = {\n", " 'vie': 'vi',\n", " 'tur': 'tr',\n", @@ -654,19 +2487,15 @@ " 'nld': 'nl',\n", "}\n", "\n", - "for _id, x in tqdm(df.iterrows(), total=len(df)):\n", - " pe_tokens = eval(x['tgt_tokens'])\n", - " pe_annotations = eval(x['tgt_annotations'])\n", - " mt_tokens = eval(x['mt_tokens'])\n", - " mt_annotations = eval(x['mt_annotations'])\n", - " mt_tbd_qe = eval(x['mt_tbd_qe'])\n", - " mt_pe_alignments_raw = eval(x['mt_pe_tbd_qe_alignments'])\n", - " mt_pe_alignments_dict = defaultdict(list)\n", - "\n", - " for k, v, score in mt_pe_alignments_raw:\n", - " if k is not None:\n", - " mt_pe_alignments_dict[k].append(v)\n", + "synt_scores_list = []\n", "\n", + "for _id, x in tqdm(df.iterrows(), total=len(df)):\n", + " pe_tokens = x['tgt_tokens']\n", + " pe_annotations = x['tgt_annotations']\n", + " mt_tokens = x['mt_tokens']\n", + " mt_annotations = x['mt_annotations']\n", + " mt_tbd_qe = x['mt_tbd_qe']\n", + " mt_pe_alignments_dict = x['mt_pe_tbd_qe_alignments_dict']\n", " mt_pe_alignments_pairs = [(k, v[0]) for k, v in mt_pe_alignments_dict.items() if len(v) > 0 and v[0] is not None]\n", "\n", " # fix 2 sentences examples with 2 heads to be 1 headed (match to first head\n", @@ -720,33 +2549,29 @@ " print('pe_annotations', [i['head'] for i in pe_annotations])\n", " ted = None\n", "\n", - " _df_synt_scores = pd.DataFrame([{\n", + " synt_scores_list.append({\n", " 'unit_id': _id,\n", " 'lang_id': x['lang_id'],\n", " 'ted': int(ted),\n", - " }])\n", - " df_synt_scores = pd.concat([df_synt_scores, _df_synt_scores], ignore_index=True)\n", + " })\n", "\n", + "df_synt_scores = pd.DataFrame(synt_scores_list)\n", "df_synt_scores['ted'] = df_synt_scores['ted'].astype(int)\n", "\n", "df_synt_scores" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-19T09:36:35.619460Z", - "start_time": "2023-07-19T09:30:26.771815Z" - } - } + ] }, { "cell_type": "code", - "execution_count": 419, + "execution_count": 116, + "metadata": {}, "outputs": [ { "data": { - "text/plain": "
", - "image/png": "" + "image/png": "", + "text/plain": [ + "
" + ] }, "metadata": {}, "output_type": "display_data" @@ -757,32 +2582,39 @@ " df_synt_scores,\n", " x=\"ted\",\n", " hue=\"lang_id\",\n", - " kde=True,\n", + " # kde=True,\n", + " kind='kde',\n", " # log_scale=(False, 2),\n", " multiple=\"layer\",\n", - " alpha=0.15,\n", + " # alpha=0.1,\n", " # facet_kws={'hist_kws':dict(alpha=0.1)}\n", ")\n", "plt.xlim(-1, 30)\n", "plt.ylim(0, None)\n", + "plt.title('Distribution of Tree Edit Distance (TED)')\n", "plt.show()" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-19T10:31:42.618669Z", - "start_time": "2023-07-19T10:31:39.680391Z" - } - } + ] }, { "cell_type": "code", - "execution_count": 372, + "execution_count": 290, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-19T10:06:31.926535Z", + "start_time": "2023-07-19T10:06:27.991091Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "
", - "image/png": "" + "image/png": "", + "text/plain": [ + "
" + ] }, "metadata": {}, "output_type": "display_data" @@ -792,7 +2624,9 @@ "\n", "f, axes = plt.subplots(2, 3, figsize=(9, 6), sharex=True, sharey=True)\n", "\n", - "for ax, lang in zip(axes.flat, df['lang_id'].unique()):\n", + "# langs = df['lang_id'].unique()\n", + "langs = ['ukr', 'ara', 'nld', 'vie', 'ita', 'tur']\n", + "for ax, lang in zip(axes.flat, langs):\n", " #\n", " # # Create a cubehelix colormap to use with kdeplot\n", " # cmap = sns.cubehelix_palette(start=s, light=1, as_cmap=True)\n", @@ -811,47 +2645,305 @@ " ax.set_title(lang)\n", " # ax.set_axis_off()\n", "\n", - "ax.set(xlim=(-1, 30))\n", - "f.suptitle(\"Distribution of Tree Edit Distance (TED)\", fontsize=12)\n", + "ax.set(xlim=(-1, 30))\n", + "f.suptitle(\"Distribution of Tree Edit Distance (TED)\", fontsize=12)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 286, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-19T09:36:35.620924Z", + "start_time": "2023-07-19T09:36:30.127487Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unit_idlang_idtedmt_tbd_bad_count
3812flores101-main-tur-16-pe2-3tur119
2849flores101-main-nld-101-pe1-3nld5718
4357flores101-main-vie-19-pe1-4vie87
3288flores101-main-nld-53-pe1-1nld108
3750flores101-main-tur-15-pe1-3tur43
\n", + "
" + ], + "text/plain": [ + " unit_id lang_id ted mt_tbd_bad_count\n", + "3812 flores101-main-tur-16-pe2-3 tur 11 9\n", + "2849 flores101-main-nld-101-pe1-3 nld 57 18\n", + "4357 flores101-main-vie-19-pe1-4 vie 8 7\n", + "3288 flores101-main-nld-53-pe1-1 nld 10 8\n", + "3750 flores101-main-tur-15-pe1-3 tur 4 3" + ] + }, + "execution_count": 286, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_synt_scores['mt_tbd_bad_count'] = df['mt_tbd_qe'].apply(lambda x: sum(len(i - {'OK', 'BAD-DEL-R'}) for i in x)).values\n", + "df_synt_scores.sample(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 287, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-19T09:43:18.169352Z", + "start_time": "2023-07-19T09:43:18.104678Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pearson correlation\n" + ] + }, + { + "data": { + "text/plain": [ + "lang_id \n", + "ara ted 0.539678\n", + "ita ted 0.442620\n", + "nld ted 0.734874\n", + "tur ted 0.726095\n", + "ukr ted 0.776998\n", + "vie ted 0.707215\n", + "Name: mt_tbd_bad_count, dtype: float64" + ] + }, + "execution_count": 287, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print('Pearson correlation')\n", + "df_synt_scores.groupby('lang_id')[['ted', 'mt_tbd_bad_count']].corr(method='pearson').loc[(slice(None),'ted'), 'mt_tbd_bad_count']" + ] + }, + { + "cell_type": "code", + "execution_count": 288, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: BAD count\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print('Note: BAD count')\n", + "\n", + "f, axes = plt.subplots(2, 3, figsize=(9, 6), sharex=True, sharey=True)\n", + "\n", + "# langs = df['lang_id'].unique()\n", + "langs = ['ukr', 'ara', 'nld', 'vie', 'ita', 'tur']\n", + "for ax, lang in zip(axes.flat, langs):\n", + " #\n", + " # # Create a cubehelix colormap to use with kdeplot\n", + " # cmap = sns.cubehelix_palette(start=s, light=1, as_cmap=True)\n", + "\n", + " sns.kdeplot(\n", + " df_synt_scores[df_synt_scores['lang_id'] == lang],\n", + " x=\"ted\",\n", + " y=\"mt_tbd_bad_count\",\n", + " # cmap=None,\n", + " fill=True,\n", + " # clip=(-5, 5),\n", + " # cut=10,\n", + " # thresh=0,\n", + " # levels=15,\n", + " ax=ax,\n", + " )\n", + " ax.set_title(lang)\n", + " # ax.set_axis_off()\n", + "\n", + "ax.set(xlim=(-5, 40), ylim=(-5, 50))\n", + "f.suptitle(\"Correlation of Tree Edit Distance (TED) vs #BAD-X tags\", fontsize=12)\n", "plt.show()" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-19T10:06:31.926535Z", - "start_time": "2023-07-19T10:06:27.991091Z" - } - } + ] }, { "cell_type": "code", - "execution_count": 309, + "execution_count": 280, + "metadata": {}, "outputs": [ { "data": { - "text/plain": " unit_id lang_id ted mt_tbd_bad_count\n2864 flores101-main-nld-100-pe2-1 nld 11 4\n433 flores101-main-ukr-5-pe2-2 ukr 2 6\n4835 flores101-main-vie-93-pe1-5 vie 7 2\n4776 flores101-main-vie-69-pe1-4 vie 18 2\n2975 flores101-main-nld-39-pe1-5 nld 0 0", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
unit_idlang_idtedmt_tbd_bad_count
2864flores101-main-nld-100-pe2-1nld114
433flores101-main-ukr-5-pe2-2ukr26
4835flores101-main-vie-93-pe1-5vie72
4776flores101-main-vie-69-pe1-4vie182
2975flores101-main-nld-39-pe1-5nld00
\n
" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unit_idlang_idtedmt_tbd_bad_count
3702flores101-main-tur-98-pe1-1tur00
77flores101-main-ukr-28-pe2-4ukr3623
2904flores101-main-nld-15-pe1-3nld31
2644flores101-main-nld-28-pe1-1nld00
1855flores101-main-ita-56-pe2-2ita02
\n", + "
" + ], + "text/plain": [ + " unit_id lang_id ted mt_tbd_bad_count\n", + "3702 flores101-main-tur-98-pe1-1 tur 0 0\n", + "77 flores101-main-ukr-28-pe2-4 ukr 36 23\n", + "2904 flores101-main-nld-15-pe1-3 nld 3 1\n", + "2644 flores101-main-nld-28-pe1-1 nld 0 0\n", + "1855 flores101-main-ita-56-pe2-2 ita 0 2" + ] }, - "execution_count": 309, + "execution_count": 280, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_synt_scores['mt_tbd_bad_count'] = df['mt_tbd_qe'].apply(eval).apply(lambda x: sum(len(i - {'OK', 'BAD-DEL-L', 'BAD-DEL-R', 'BAD-SHF'}) for i in x)).values\n", + "df_synt_scores['mt_tbd_bad_count'] = df['mt_tbd_qe'].apply(lambda x: sum(len(i - {'OK', 'BAD-DEL-L', 'BAD-DEL-R', 'BAD-SHF'}) for i in x)).values\n", "df_synt_scores.sample(5)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-19T09:36:35.620924Z", - "start_time": "2023-07-19T09:36:30.127487Z" - } - } + ] }, { "cell_type": "code", - "execution_count": 327, + "execution_count": 281, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -862,9 +2954,18 @@ }, { "data": { - "text/plain": "lang_id \nara ted 0.556922\nita ted 0.434015\nnld ted 0.669098\ntur ted 0.689409\nukr ted 0.710757\nvie ted 0.642947\nName: mt_tbd_bad_count, dtype: float64" + "text/plain": [ + "lang_id \n", + "ara ted 0.556922\n", + "ita ted 0.434015\n", + "nld ted 0.669098\n", + "tur ted 0.689409\n", + "ukr ted 0.710757\n", + "vie ted 0.642947\n", + "Name: mt_tbd_bad_count, dtype: float64" + ] }, - "execution_count": 327, + "execution_count": 281, "metadata": {}, "output_type": "execute_result" } @@ -872,18 +2973,21 @@ "source": [ "print('Pearson correlation')\n", "df_synt_scores.groupby('lang_id')[['ted', 'mt_tbd_bad_count']].corr(method='pearson').loc[(slice(None),'ted'), 'mt_tbd_bad_count']" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-19T09:43:18.169352Z", - "start_time": "2023-07-19T09:43:18.104678Z" - } - } + ] }, { "cell_type": "code", - "execution_count": 355, + "execution_count": 282, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-19T10:07:10.961389Z", + "start_time": "2023-07-19T10:07:04.374076Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "name": "stdout", @@ -894,8 +2998,10 @@ }, { "data": { - "text/plain": "
", - "image/png": "" + "image/png": "", + "text/plain": [ + "
" + ] }, "metadata": {}, "output_type": "display_data" @@ -903,45 +3009,12 @@ ], "source": [ "print('Note: BAD count except for BAD-DEL and BAD-SHF')\n", - "sns.jointplot(df_synt_scores, x=\"ted\", y=\"mt_tbd_bad_count\", hue=\"lang_id\", kind=\"kde\", fill=Fl, marginal_kws={'hist_kws':dict(alpha=0.1)})\n", - "plt.xlim(-5, 60)\n", - "plt.ylim(-5, 30)\n", - "plt.show()" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-19T09:59:48.479409Z", - "start_time": "2023-07-19T09:59:42.344089Z" - } - } - }, - { - "cell_type": "code", - "execution_count": 374, - "outputs": [ - { - "data": { - "text/plain": "Text(0.5, 0.98, 'Correlation of Tree Edit Distance (TED) vs #BAD-X tags')" - }, - "execution_count": 374, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": "
", - "image/png": "" - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ "\n", "f, axes = plt.subplots(2, 3, figsize=(9, 6), sharex=True, sharey=True)\n", "\n", - "for ax, lang in zip(axes.flat, df['lang_id'].unique()):\n", + "# langs = df['lang_id'].unique()\n", + "langs = ['ukr', 'ara', 'nld', 'vie', 'ita', 'tur']\n", + "for ax, lang in zip(axes.flat, langs):\n", " #\n", " # # Create a cubehelix colormap to use with kdeplot\n", " # cmap = sns.cubehelix_palette(start=s, light=1, as_cmap=True)\n", @@ -961,30 +3034,36 @@ " ax.set_title(lang)\n", " # ax.set_axis_off()\n", "\n", - "ax.set(xlim=(-7, 40), ylim=(-5, 25))\n", - "f.suptitle(\"Correlation of Tree Edit Distance (TED) vs #BAD-X tags\", fontsize=12)\n", + "ax.set(xlim=(-5, 40), ylim=(-5, 25))\n", + "f.suptitle(\"Correlation of Tree Edit Distance (TED) vs #BAD-X tags (\\\\wo BAD-DEL/BAD-SHF)\", fontsize=12)\n", "plt.show()" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-19T10:07:10.961389Z", - "start_time": "2023-07-19T10:07:04.374076Z" - } - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "---" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 227, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-18T17:00:42.468181Z", + "start_time": "2023-07-18T17:00:42.176945Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [], "source": [ "import matplotlib.ticker as ticker\n", @@ -1012,11 +3091,11 @@ " # Draw a dot plot using the stripplot function\n", " g.map(\n", " sns.stripplot,\n", - " size=15,\n", + " size=10,\n", " orient=\"h\",\n", " jitter=False,\n", " palette=\"flare_r\",\n", - " linewidth=2,\n", + " linewidth=1,\n", " edgecolor=\"w\",\n", " )\n", "\n", @@ -1028,8 +3107,28 @@ " for ax, col in zip(g.axes.flat, summary_df.columns):\n", " avg = summary_df[col].mean()\n", " ax.axvline(avg, color='r', linestyle='--')\n", + " \n", + " if 'total' not in title:\n", + " continue\n", "\n", - " step = 5000 if summary_df[col].max() > 10000 else 2500 if summary_df[col].max() > 5000 else 1000 if summary_df[col].max() > 2000 else 500 if summary_df[col].max() > 800 else 100 if summary_df[col].max() > 500 else 50\n", + " step = 1\n", + " if summary_df[col].max() > 15000:\n", + " step = 10000\n", + " elif summary_df[col].max() > 10000:\n", + " step = 5000\n", + " elif summary_df[col].max() > 5000:\n", + " step = 2500\n", + " elif summary_df[col].max() > 2000:\n", + " step = 1000\n", + " elif summary_df[col].max() > 800:\n", + " step = 500\n", + " elif summary_df[col].max() > 500:\n", + " step = 100\n", + " elif summary_df[col].max() > 1:\n", + " step = 50\n", + " else:\n", + " step = 1\n", + " \n", " ax.set_xticks(np.arange(0, summary_df[col].max(), step=step))\n", " ax.xaxis.set_major_formatter(ticker.EngFormatter())\n", "\n", @@ -1040,103 +3139,926 @@ " ax.set(title=title)\n", " ax.xaxis.grid(True)\n", " ax.yaxis.grid(True)\n", + " if 'total' not in title:\n", + " # ax.set_xlim(0, 1)\n", + " ax.set_xlabel('Rate')\n", "\n", " sns.despine(left=True, bottom=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 228, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unit_idlang_idmt_tokpe_tokmt_pospe_possame_wordsame_possame_lemmasame_morfsame_deprel
0flores101-main-ukr-100-pe1-1ukrпривідADPADPFalseTrueFalseFalseTrue
1flores101-main-ukr-100-pe1-1ukrвступіповерненняNOUNNOUNFalseTrueFalseFalseFalse
2flores101-main-ukr-100-pe1-1ukrфазифазаNOUNNOUNFalseTrueTrueFalseFalse
3flores101-main-ukr-100-pe1-1ukrбутипроходитиAUXVERBFalseFalseFalseTrueFalse
4flores101-main-ukr-100-pe1-3ukrПовернувшисьПрожившиVERBVERBFalseTrueFalseTrueTrue
....................................
14804flores101-main-vie-48-pe1-3viebằngtrênADPADPFalseTrueFalseTrueTrue
14805flores101-main-vie-48-pe1-3vievận chuyểntàu thuyềnVERBNOUNFalseFalseFalseTrueFalse
14806flores101-main-vie-48-pe1-3viecuộcđoànNOUNNOUNFalseTrueFalseTrueTrue
14807flores101-main-vie-48-pe1-3viethoạitruyềnNOUNVERBFalseFalseFalseTrueTrue
14808flores101-main-vie-48-pe1-4vieđiện thoạithôngNOUNADJFalseFalseFalseTrueFalse
\n", + "

14809 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " unit_id lang_id mt_tok pe_tok mt_pos \\\n", + "0 flores101-main-ukr-100-pe1-1 ukr при від ADP \n", + "1 flores101-main-ukr-100-pe1-1 ukr вступі повернення NOUN \n", + "2 flores101-main-ukr-100-pe1-1 ukr фази фаза NOUN \n", + "3 flores101-main-ukr-100-pe1-1 ukr бути проходити AUX \n", + "4 flores101-main-ukr-100-pe1-3 ukr Повернувшись Проживши VERB \n", + "... ... ... ... ... ... \n", + "14804 flores101-main-vie-48-pe1-3 vie bằng trên ADP \n", + "14805 flores101-main-vie-48-pe1-3 vie vận chuyển tàu thuyền VERB \n", + "14806 flores101-main-vie-48-pe1-3 vie cuộc đoàn NOUN \n", + "14807 flores101-main-vie-48-pe1-3 vie thoại truyền NOUN \n", + "14808 flores101-main-vie-48-pe1-4 vie điện thoại thông NOUN \n", + "\n", + " pe_pos same_word same_pos same_lemma same_morf same_deprel \n", + "0 ADP False True False False True \n", + "1 NOUN False True False False False \n", + "2 NOUN False True True False False \n", + "3 VERB False False False True False \n", + "4 VERB False True False True True \n", + "... ... ... ... ... ... ... \n", + "14804 ADP False True False True True \n", + "14805 NOUN False False False True False \n", + "14806 NOUN False True False True True \n", + "14807 VERB False False False True True \n", + "14808 ADJ False False False True False \n", + "\n", + "[14809 rows x 11 columns]" + ] + }, + "execution_count": 228, + "metadata": {}, + "output_type": "execute_result" + } ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-18T17:00:42.468181Z", - "start_time": "2023-07-18T17:00:42.176945Z" + "source": [ + "df_stats" + ] + }, + { + "cell_type": "code", + "execution_count": 229, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unit_idlang_idmt_tbd_qe_tagsOKBAD-SUBBAD-DEL-RBAD-DEL-LBAD-SHFBAD-CONBAD-EXPBAD-INS
0flores101-main-ukr-100-pe1-1ukr{OK}TrueFalseFalseFalseFalseFalseFalseFalse
1flores101-main-ukr-100-pe1-1ukr{BAD-SUB}FalseTrueFalseFalseFalseFalseFalseFalse
2flores101-main-ukr-100-pe1-1ukr{BAD-SUB, BAD-DEL-R}FalseTrueTrueFalseFalseFalseFalseFalse
3flores101-main-ukr-100-pe1-1ukr{OK, BAD-DEL-L}TrueFalseFalseTrueFalseFalseFalseFalse
4flores101-main-ukr-100-pe1-1ukr{OK}TrueFalseFalseFalseFalseFalseFalseFalse
....................................
108478flores101-main-vie-48-pe1-4vie{OK}TrueFalseFalseFalseFalseFalseFalseFalse
108479flores101-main-vie-48-pe1-4vie{OK}TrueFalseFalseFalseFalseFalseFalseFalse
108480flores101-main-vie-48-pe1-4vie{OK}TrueFalseFalseFalseFalseFalseFalseFalse
108481flores101-main-vie-48-pe1-4vie{OK}TrueFalseFalseFalseFalseFalseFalseFalse
108482flores101-main-vie-48-pe1-4vie{OK}TrueFalseFalseFalseFalseFalseFalseFalse
\n", + "

108483 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " unit_id lang_id mt_tbd_qe_tags OK \\\n", + "0 flores101-main-ukr-100-pe1-1 ukr {OK} True \n", + "1 flores101-main-ukr-100-pe1-1 ukr {BAD-SUB} False \n", + "2 flores101-main-ukr-100-pe1-1 ukr {BAD-SUB, BAD-DEL-R} False \n", + "3 flores101-main-ukr-100-pe1-1 ukr {OK, BAD-DEL-L} True \n", + "4 flores101-main-ukr-100-pe1-1 ukr {OK} True \n", + "... ... ... ... ... \n", + "108478 flores101-main-vie-48-pe1-4 vie {OK} True \n", + "108479 flores101-main-vie-48-pe1-4 vie {OK} True \n", + "108480 flores101-main-vie-48-pe1-4 vie {OK} True \n", + "108481 flores101-main-vie-48-pe1-4 vie {OK} True \n", + "108482 flores101-main-vie-48-pe1-4 vie {OK} True \n", + "\n", + " BAD-SUB BAD-DEL-R BAD-DEL-L BAD-SHF BAD-CON BAD-EXP BAD-INS \n", + "0 False False False False False False False \n", + "1 True False False False False False False \n", + "2 True True False False False False False \n", + "3 False False True False False False False \n", + "4 False False False False False False False \n", + "... ... ... ... ... ... ... ... \n", + "108478 False False False False False False False \n", + "108479 False False False False False False False \n", + "108480 False False False False False False False \n", + "108481 False False False False False False False \n", + "108482 False False False False False False False \n", + "\n", + "[108483 rows x 11 columns]" + ] + }, + "execution_count": 229, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_error_types" + ] + }, + { + "cell_type": "code", + "execution_count": 230, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "lang_id\n", + "ara 2055\n", + "ita 2368\n", + "nld 1728\n", + "tur 2217\n", + "ukr 4295\n", + "vie 2146\n", + "dtype: int64" + ] + }, + "execution_count": 230, + "metadata": {}, + "output_type": "execute_result" } - } + ], + "source": [ + "_df_stats[_df_stats['BAD-SUB']].groupby(['lang_id']).size()" + ] }, { "cell_type": "code", - "execution_count": 190, + "execution_count": 246, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "TOTAL:\t 14809\n", - "SAME POS:\t 10820\n", - "DIFF POS:\t 3989\n" + "TOTAL:\t 18206\n" ] }, { "data": { - "text/plain": " total same_pos diff_pos diff_deprel\nlang_id \nara 2055 1230 825 1027\nita 2368 1702 666 861\nnld 1728 1301 427 600\ntur 2217 1685 532 1079\nukr 4295 3425 870 1641\nvie 2146 1477 669 1217\nTOTAL 14809 10820 3989 6425\nAVG 2468 1803 664 1070", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
totalsame_posdiff_posdiff_deprel
lang_id
ara205512308251027
ita23681702666861
nld17281301427600
tur221716855321079
ukr429534258701641
vie214614776691217
TOTAL148091082039896425
AVG246818036641070
\n
" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
totalBAD-SUBBAD-INSBAD-CONBAD-EXPBAD-SHFBAD-DEL-RBAD-DEL-L
lang_id
ara25280.8129000.000000.04030.1468000.3089000.1642000.13410
ita26360.8983000.000400.02770.0736000.2329000.1756000.13510
nld20670.8360000.000500.04160.1219000.3527000.1592000.14510
tur27670.8012000.000000.05280.1460000.2895000.1395000.10010
ukr50850.8446000.000800.03130.1233000.3650000.1851000.16180
vie31230.6872000.007000.06790.2379000.4143000.2520000.18570
TOTAL182064.8802000.008700.26160.8495001.9633001.0756000.86190
AVG30340.8133670.001450.04360.1415830.3272170.1792670.14365
\n", + "
" + ], + "text/plain": [ + " total BAD-SUB BAD-INS BAD-CON BAD-EXP BAD-SHF BAD-DEL-R \\\n", + "lang_id \n", + "ara 2528 0.812900 0.00000 0.0403 0.146800 0.308900 0.164200 \n", + "ita 2636 0.898300 0.00040 0.0277 0.073600 0.232900 0.175600 \n", + "nld 2067 0.836000 0.00050 0.0416 0.121900 0.352700 0.159200 \n", + "tur 2767 0.801200 0.00000 0.0528 0.146000 0.289500 0.139500 \n", + "ukr 5085 0.844600 0.00080 0.0313 0.123300 0.365000 0.185100 \n", + "vie 3123 0.687200 0.00700 0.0679 0.237900 0.414300 0.252000 \n", + "TOTAL 18206 4.880200 0.00870 0.2616 0.849500 1.963300 1.075600 \n", + "AVG 3034 0.813367 0.00145 0.0436 0.141583 0.327217 0.179267 \n", + "\n", + " BAD-DEL-L \n", + "lang_id \n", + "ara 0.13410 \n", + "ita 0.13510 \n", + "nld 0.14510 \n", + "tur 0.10010 \n", + "ukr 0.16180 \n", + "vie 0.18570 \n", + "TOTAL 0.86190 \n", + "AVG 0.14365 " + ] }, - "execution_count": 190, + "execution_count": 246, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "print('TOTAL:\\t', len(df_stats))\n", - "print('SAME POS:\\t', len(df_stats[df_stats['same_pos']]))\n", - "print('DIFF POS:\\t', len(df_stats[~df_stats['same_pos']]))\n", + "_df_stats = df_error_types[~df_error_types['OK']]\n", "\n", + "print('TOTAL:\\t', len(_df_stats))\n", + "\n", + "_total = _df_stats.groupby(['lang_id']).size()\n", "tmp = pd.DataFrame([\n", - " df_stats.groupby(['lang_id']).size(),\n", - " df_stats[df_stats['same_pos']].groupby(['lang_id']).size(),\n", - " df_stats[~df_stats['same_pos']].groupby(['lang_id']).size(),\n", - " df_stats[~df_stats['same_deprel']].groupby(['lang_id']).size(),\n", + " _total,\n", + " *[ \n", + " _df_stats[_df_stats[i]].groupby(['lang_id']).size() / _total\n", + " for i in ['BAD-SUB', 'BAD-INS', 'BAD-CON', 'BAD-EXP', 'BAD-SHF', 'BAD-DEL-R', 'BAD-DEL-L']\n", + " ],\n", "], index=[\n", " 'total',\n", - " 'same_pos',\n", - " 'diff_pos',\n", - " 'diff_deprel',\n", - "]).fillna(0).astype('int').T\n", - "tmp.loc['TOTAL'] = tmp.sum(numeric_only=True)\n", - "tmp.loc['AVG'] = (tmp.loc['TOTAL'] / len(set(df_stats['lang_id']) - {'TOTAL'})).astype('int')\n", + " *['BAD-SUB', 'BAD-INS', 'BAD-CON', 'BAD-EXP', 'BAD-SHF', 'BAD-DEL-R', 'BAD-DEL-L']\n", + "]).fillna(0).T.apply(lambda x: round(x, 4)) #.astype('int').T\n", + "tmp.loc['TOTAL'] = tmp.sum(numeric_only=True).astype('float')\n", + "tmp.loc['AVG'] = (tmp.loc['TOTAL'] / len(set(_df_stats['lang_id']) - {'TOTAL'})).astype('float')\n", + "tmp['total'] = tmp['total'].astype('int')\n", "\n", "# print(tmp)\n", "tmp" + ] + }, + { + "cell_type": "code", + "execution_count": 247, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], + "source": [ + "plot_summary_df(tmp, title='BAD-X types')" + ] + }, + { + "cell_type": "code", + "execution_count": 240, "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2023-07-13T11:05:51.373876Z", "start_time": "2023-07-13T11:05:51.325488Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } - } - }, - { - "cell_type": "code", - "execution_count": 193, + }, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TOTAL:\t 14809\n", + "SAME POS:\t 10820\n", + "DIFF POS:\t 3989\n" + ] + }, { "data": { - "text/plain": " total same_pos diff_pos diff_deprel\nlang_id \nara 2055 1230 825 1027\nita 2368 1702 666 861\nnld 1728 1301 427 600\ntur 2217 1685 532 1079\nukr 4295 3425 870 1641\nvie 2146 1477 669 1217\nTOTAL 14809 10820 3989 6425\nAVG 2468 1803 664 1070", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
totalsame_posdiff_posdiff_deprel
lang_id
ara205512308251027
ita23681702666861
nld17281301427600
tur221716855321079
ukr429534258701641
vie214614776691217
TOTAL148091082039896425
AVG246818036641070
\n
" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
totalsame_posdiff_posdiff_deprel
lang_id
ara20550.5985000.4015000.499800
ita23680.7188000.2812000.363600
nld17280.7529000.2471000.347200
tur22170.7600000.2400000.486700
ukr42950.7974000.2026000.382100
vie21460.6883000.3117000.567100
TOTAL148094.3159001.6841002.646500
AVG24680.7193170.2806830.441083
\n", + "
" + ], + "text/plain": [ + " total same_pos diff_pos diff_deprel\n", + "lang_id \n", + "ara 2055 0.598500 0.401500 0.499800\n", + "ita 2368 0.718800 0.281200 0.363600\n", + "nld 1728 0.752900 0.247100 0.347200\n", + "tur 2217 0.760000 0.240000 0.486700\n", + "ukr 4295 0.797400 0.202600 0.382100\n", + "vie 2146 0.688300 0.311700 0.567100\n", + "TOTAL 14809 4.315900 1.684100 2.646500\n", + "AVG 2468 0.719317 0.280683 0.441083" + ] }, - "execution_count": 193, + "execution_count": 240, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "_df_stats = df_stats\n", + "\n", + "print('TOTAL:\\t', len(_df_stats))\n", + "print('SAME POS:\\t', len(_df_stats[_df_stats['same_pos']]))\n", + "print('DIFF POS:\\t', len(_df_stats[~_df_stats['same_pos']]))\n", + "\n", + "_total_sub = _df_stats.groupby(['lang_id']).size()\n", + "tmp = pd.DataFrame([\n", + " _df_stats.groupby(['lang_id']).size(),\n", + " _df_stats[_df_stats['same_pos']].groupby(['lang_id']).size() / _total_sub,\n", + " _df_stats[~_df_stats['same_pos']].groupby(['lang_id']).size() / _total_sub,\n", + " _df_stats[~_df_stats['same_deprel']].groupby(['lang_id']).size() / _total_sub,\n", + "], index=[\n", + " 'total',\n", + " 'same_pos',\n", + " 'diff_pos',\n", + " 'diff_deprel',\n", + "]).fillna(0).T.apply(lambda x: round(x, 4)) #.astype('int').T\n", + "tmp.loc['TOTAL'] = tmp.sum(numeric_only=True).astype('float')\n", + "tmp.loc['AVG'] = (tmp.loc['TOTAL'] / len(set(_df_stats['lang_id']) - {'TOTAL'})).astype('float')\n", + "tmp['total'] = tmp['total'].astype('int')\n", + "\n", + "# print(tmp)\n", "tmp" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-13T11:06:01.710994Z", - "start_time": "2023-07-13T11:06:01.681673Z" - } - } + ] }, { "cell_type": "code", - "execution_count": 191, + "execution_count": 241, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-13T11:05:54.004076Z", + "start_time": "2023-07-13T11:05:52.391641Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "
", - "image/png": "" + "image/png": "", + "text/plain": [ + "
" + ] }, "metadata": {}, "output_type": "display_data" @@ -1144,18 +4066,21 @@ ], "source": [ "plot_summary_df(tmp, title='BAD-SUB types')" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-13T11:05:54.004076Z", - "start_time": "2023-07-13T11:05:52.391641Z" - } - } + ] }, { "cell_type": "code", - "execution_count": 182, + "execution_count": 238, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-13T10:37:02.689878Z", + "start_time": "2023-07-13T10:37:02.633850Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "name": "stdout", @@ -1166,10 +4091,144 @@ }, { "data": { - "text/plain": " total (same pos) same_lemma same_morf same_lemma diff_morf \\\nlang_id \nara 1230 93 374 \nita 1702 100 500 \nnld 1301 153 100 \ntur 1685 115 623 \nukr 3425 128 1164 \nvie 1477 0 0 \nTOTAL 10820 589 2761 \nAVG 1803 98 460 \n\n diff_lemma same_morf diff_lemma diff_morf same_word (diff case) \nlang_id \nara 498 265 0 \nita 705 397 68 \nnld 867 181 114 \ntur 604 343 79 \nukr 1067 1066 124 \nvie 1468 9 160 \nTOTAL 5209 2261 545 \nAVG 868 376 90 ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
total (same pos)same_lemma same_morfsame_lemma diff_morfdiff_lemma same_morfdiff_lemma diff_morfsame_word (diff case)
lang_id
ara1230933744982650
ita170210050070539768
nld1301153100867181114
tur168511562360434379
ukr3425128116410671066124
vie14770014689160
TOTAL10820589276152092261545
AVG18039846086837690
\n
" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
total (same pos)same_lemma same_morfsame_lemma diff_morfdiff_lemma same_morfdiff_lemma diff_morfsame_word (diff case)
lang_id
ara20550.07560.3041000.40490.2154000.000000
ita23680.05880.2938000.41420.2333000.040000
nld17280.11760.0769000.66640.1391000.087600
tur22170.06820.3697000.35850.2036000.046900
ukr42950.03740.3399000.31150.3112000.036200
vie21460.00000.0000000.99390.0061000.108300
TOTAL148090.35761.3844003.14941.1087000.319000
AVG24680.05960.2307330.52490.1847830.053167
\n", + "
" + ], + "text/plain": [ + " total (same pos) same_lemma same_morf same_lemma diff_morf \\\n", + "lang_id \n", + "ara 2055 0.0756 0.304100 \n", + "ita 2368 0.0588 0.293800 \n", + "nld 1728 0.1176 0.076900 \n", + "tur 2217 0.0682 0.369700 \n", + "ukr 4295 0.0374 0.339900 \n", + "vie 2146 0.0000 0.000000 \n", + "TOTAL 14809 0.3576 1.384400 \n", + "AVG 2468 0.0596 0.230733 \n", + "\n", + " diff_lemma same_morf diff_lemma diff_morf same_word (diff case) \n", + "lang_id \n", + "ara 0.4049 0.215400 0.000000 \n", + "ita 0.4142 0.233300 0.040000 \n", + "nld 0.6664 0.139100 0.087600 \n", + "tur 0.3585 0.203600 0.046900 \n", + "ukr 0.3115 0.311200 0.036200 \n", + "vie 0.9939 0.006100 0.108300 \n", + "TOTAL 3.1494 1.108700 0.319000 \n", + "AVG 0.5249 0.184783 0.053167 " + ] }, - "execution_count": 182, + "execution_count": 238, "metadata": {}, "output_type": "execute_result" } @@ -1178,13 +4237,14 @@ "_df_stats = df_stats[df_stats['same_pos']]\n", "print('TOTAL:\\t', len(_df_stats))\n", "\n", + "_total = _df_stats.groupby(['lang_id']).size()\n", "tmp = pd.DataFrame([\n", - " _df_stats.groupby(['lang_id']).size(),\n", - " _df_stats[_df_stats['same_lemma'] & _df_stats['same_morf']].groupby(['lang_id']).size(),\n", - " _df_stats[_df_stats['same_lemma'] & ~_df_stats['same_morf']].groupby(['lang_id']).size(),\n", - " _df_stats[~_df_stats['same_lemma'] & _df_stats['same_morf']].groupby(['lang_id']).size(),\n", - " _df_stats[~_df_stats['same_lemma'] & ~_df_stats['same_morf']].groupby(['lang_id']).size(),\n", - " _df_stats[_df_stats['same_word']].groupby(['lang_id']).size(),\n", + " _total_sub,\n", + " _df_stats[_df_stats['same_lemma'] & _df_stats['same_morf']].groupby(['lang_id']).size() / _total,\n", + " _df_stats[_df_stats['same_lemma'] & ~_df_stats['same_morf']].groupby(['lang_id']).size() / _total,\n", + " _df_stats[~_df_stats['same_lemma'] & _df_stats['same_morf']].groupby(['lang_id']).size() / _total,\n", + " _df_stats[~_df_stats['same_lemma'] & ~_df_stats['same_morf']].groupby(['lang_id']).size() / _total,\n", + " _df_stats[_df_stats['same_word']].groupby(['lang_id']).size() / _total,\n", "], index=[\n", " 'total (same pos)',\n", " 'same_lemma same_morf',\n", @@ -1192,29 +4252,35 @@ " 'diff_lemma same_morf',\n", " 'diff_lemma diff_morf',\n", " 'same_word (diff case)',\n", - "]).fillna(0).astype('int').T\n", - "tmp.loc['TOTAL'] = tmp.sum(numeric_only=True)\n", - "tmp.loc['AVG'] = (tmp.loc['TOTAL'] / len(set(df_stats['lang_id']) - {'TOTAL'})).astype('int')\n", + "]).fillna(0).T.apply(lambda x: round(x, 4)) #.astype('int').T\n", + "tmp.loc['TOTAL'] = tmp.sum(numeric_only=True).astype('float')\n", + "tmp.loc['AVG'] = (tmp.loc['TOTAL'] / len(set(_df_stats['lang_id']) - {'TOTAL'})).astype('float')\n", + "tmp['total (same pos)'] = tmp['total (same pos)'].astype('int')\n", "\n", "# print(tmp)\n", "tmp" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-13T10:37:02.689878Z", - "start_time": "2023-07-13T10:37:02.633850Z" - } - } + ] }, { "cell_type": "code", - "execution_count": 183, + "execution_count": 239, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-13T10:37:11.466933Z", + "start_time": "2023-07-13T10:37:08.060140Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [ { "data": { - "text/plain": "
", - "image/png": "" + "image/png": "", + "text/plain": [ + "
" + ] }, "metadata": {}, "output_type": "display_data" @@ -1222,53 +4288,73 @@ ], "source": [ "plot_summary_df(tmp, title='BAD-SUB same_pos types')" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-07-13T10:37:11.466933Z", - "start_time": "2023-07-13T10:37:08.060140Z" - } - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [], "metadata": { - "collapsed": false - } + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "---" - ], - "metadata": { - "collapsed": false - } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.9.13" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 4 }