From 702926eee3c6083014a168ee8b276bb51f79b2a6 Mon Sep 17 00:00:00 2001 From: Riddhimaan-Senapati Date: Thu, 5 Jun 2025 12:38:49 -0400 Subject: [PATCH] Fixed a hidden logging bug in word_count.py in add_tokenize_docs --- CHANGELOG.md | 2 +- src/cdstemplate/word_count.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d583675..f523940 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ You should also add project tags for each release in Github, see [Managing relea ## [Unreleased] ### Changed - GitHub workflow for linting and formatting uses ruff as a separate job - +- Fixed Logging bug in `add_tokenize_docs` in `word_count.py` ### Removed - GitHub action to run flake8 for linting in build - Removed wildcard from corpus-counter script dependency diff --git a/src/cdstemplate/word_count.py b/src/cdstemplate/word_count.py index 34521a5..8834bab 100644 --- a/src/cdstemplate/word_count.py +++ b/src/cdstemplate/word_count.py @@ -60,7 +60,7 @@ def add_tokenized_doc(self, token_list): logger.info("Adding %s token(s) case insensitively", len(token_list)) self.token_counter.update([w.lower() for w in non_empty_tokens]) else: - logger.info("Adding %s token(s) case insensitively", len(token_list)) + logger.info("Adding %s token(s) case sensitively", len(token_list)) self.token_counter.update(non_empty_tokens) after_vocab_size = self.get_vocab_size()