From 016f911021f795ec01710b860aa08fa0ce7e0a6e Mon Sep 17 00:00:00 2001 From: Issa Memari Date: Wed, 18 May 2022 11:34:22 +0200 Subject: [PATCH] Fix locale setting in tokenization --- polyglot/tokenize/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polyglot/tokenize/base.py b/polyglot/tokenize/base.py index c1dcb61..b9c79c7 100644 --- a/polyglot/tokenize/base.py +++ b/polyglot/tokenize/base.py @@ -11,7 +11,7 @@ class Breaker(object): """ Base class to segment text.""" def __init__(self, locale): - self.locale = Locale('locale') + self.locale = Locale(locale) self.breaker = None def transform(self, sequence): @@ -23,7 +23,7 @@ def transform(self, sequence): seq.idx.extend([offset+x for x in self.breaker]) return seq - + class SentenceTokenizer(Breaker): """ Segment text to sentences. """