From b7e80e030f717559dae007d22db9322d9f20ef38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20W=C3=BCnsch?= <41543402+gwuen@users.noreply.github.com> Date: Thu, 19 Jun 2025 15:40:26 +0200 Subject: [PATCH] =?UTF-8?q?chore:=20remove=20invalid=20`=E2=80=93psm=200`?= =?UTF-8?q?=20option=20from=20tessdata=5Fconfig?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `–psm 0` option was ignored by Tesseract because it used an en-dash instead of a hyphen, and only a single dash instead of the required double dash. Using the correct `--psm 0` is also not appropriate, as it triggers "Orientation and script detection (OSD) only", which does return any text and causes frog to fail. As a result, the behavior remains unchanged and continues to default to "Fully automatic page segmentation, but no OSD". --- frog/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frog/config.py b/frog/config.py index ccf9aab..d4a1453 100644 --- a/frog/config.py +++ b/frog/config.py @@ -41,4 +41,4 @@ tessdata_url = "https://github.com/tesseract-ocr/tessdata/raw/main/" tessdata_best_url = "https://github.com/tesseract-ocr/tessdata_best/raw/main/" tessdata_dir = os.path.join(os.environ['XDG_DATA_HOME'], 'tessdata') -tessdata_config = f'--tessdata-dir {tessdata_dir} –psm 0 --oem 1' +tessdata_config = f'--tessdata-dir {tessdata_dir} --oem 1'