From 882d2fd2cf49620a3872f739addf80a09fe9b1a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20W=C3=BCnsch?= <41543402+gwuen@users.noreply.github.com> Date: Thu, 19 Jun 2025 14:22:17 +0200 Subject: [PATCH] fix: handle unrecognized language codes gracefully Previously, the language list would fail to load if the tessdata directory contained any files with unrecognized language codes, as sorting would raise an exception for unknown languages. The check for valid codes, which was already implemented in `get_downloaded_languages()`, has been moved to be done earlier in `get_downloaded_codes()` to prevent these errors. Now, any unrecognized codes are ignored, with a warning message logged to the console. --- frog/language_manager.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/frog/language_manager.py b/frog/language_manager.py index e042ddc..c9e9f6f 100644 --- a/frog/language_manager.py +++ b/frog/language_manager.py @@ -232,14 +232,18 @@ def get_downloaded_codes(self, force: bool = False) -> List[str]: for lang_file in os.listdir(tessdata_dir)] self._need_update_cache = False logger.debug(f"Cache downloaded codes: {self._downloaded_codes}") - return sorted(self._downloaded_codes, key=lambda x: self.get_language(x)) + + recognized_codes = [] + for code in self._downloaded_codes: + if code not in self._languages: + logger.warning(f'Unrecognized language code: {code}') + continue + recognized_codes.append(code) + + return sorted(recognized_codes, key=lambda x: self.get_language(x)) def get_downloaded_languages(self, force: bool = False) -> List[str]: - languages = [] - for code in self.get_downloaded_codes(force): - if code in self._languages: - languages.append(self.get_language(code)) - return sorted(languages) + return sorted({self.get_language(code) for code in self.get_downloaded_codes(force)}) def download(self, code): self.emit('added', code)