From 1bebb7970ff74c3baf5e3f39b0d914e08178475f Mon Sep 17 00:00:00 2001 From: hnikolov Date: Wed, 11 Mar 2026 09:40:03 +0000 Subject: [PATCH 01/19] integrating xray and stopwords --- redactor/core/redaction/file_processor.py | 35 +++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/redactor/core/redaction/file_processor.py b/redactor/core/redaction/file_processor.py index 8a811c3c..c950c7fa 100644 --- a/redactor/core/redaction/file_processor.py +++ b/redactor/core/redaction/file_processor.py @@ -1,5 +1,6 @@ import json import pymupdf +import os from typing import Set, Type, List, Any, Dict, Tuple from abc import ABC, abstractmethod @@ -7,6 +8,7 @@ from PIL import Image from pydantic import BaseModel from itertools import chain +from yaml import safe_load from core.redaction.redactor import ( Redactor, @@ -31,6 +33,7 @@ from core.util.logging_util import LoggingUtil, log_to_appins from core.util.types import PydanticImage import dataclasses +import xray class FileProcessor(ABC): @@ -195,6 +198,28 @@ def _extract_pdf_text(self, file_bytes: BytesIO) -> str: return None return "\n".join(page for page in pages) + def _find_bad_redactions(self, file_bytes: BytesIO): + """ + Return a list of bad redactions in the give PDF + + :param BytesIO file_bytes: Bytes stream for the PDF + :return List[]: the bad redaction strings + """ + pdf = pymupdf.open(stream=file_bytes) + bad_redactions = xray.inspect(pdf) + bad_redactions_list = [item["text"] for items in bad_redactions.values() for item in items] + return bad_redactions_list + + def _load_stopwords(self): + """ + Check the text_to_redact list against the list in the stopwords yaml + + :return List[]: the bad redaction strings + """ + stopwords = safe_load(open(os.path.join("config", "stopwords.yaml"), "r")) + stopword_list = stopwords["stopwords"] + return stopword_list + def _extract_pdf_images(self, file_bytes: BytesIO): """ Return the images of the given PDF as a list of PDFImageMetadata objects @@ -1014,6 +1039,16 @@ def redact( for result in text_redaction_results for redaction_string in result.redaction_strings ] + # Add bad redactions to the text redaction list + pdf = pymupdf.open(stream=file_bytes) + bad_redactions = xray.inspect(pdf) + bad_redactions_list = [item["text"] for items in bad_redactions.values() for item in items] + text_redactions = text_redactions + bad_redactions_list + # Remove stopwords from text redaction list + stopwords = safe_load(open(os.path.join("config", "stopwords.yaml"), "r")) + stopword_list = stopwords["stopwords"] + text_redactions = text_redactions - stopword_list + image_redaction_results: List[ImageRedactionResult] = [ x for x in redaction_results From 6663c907e3c90df1e1bd7f62d81b7e4a2d4eab8b Mon Sep 17 00:00:00 2001 From: hnikolov Date: Fri, 13 Mar 2026 13:10:24 +0000 Subject: [PATCH 02/19] xray + redactions functions --- redactor/core/redaction/file_processor.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/redactor/core/redaction/file_processor.py b/redactor/core/redaction/file_processor.py index c950c7fa..36d28b74 100644 --- a/redactor/core/redaction/file_processor.py +++ b/redactor/core/redaction/file_processor.py @@ -1040,13 +1040,10 @@ def redact( for redaction_string in result.redaction_strings ] # Add bad redactions to the text redaction list - pdf = pymupdf.open(stream=file_bytes) - bad_redactions = xray.inspect(pdf) - bad_redactions_list = [item["text"] for items in bad_redactions.values() for item in items] + bad_redactions_list = self._find_bad_redactions(file_bytes) text_redactions = text_redactions + bad_redactions_list # Remove stopwords from text redaction list - stopwords = safe_load(open(os.path.join("config", "stopwords.yaml"), "r")) - stopword_list = stopwords["stopwords"] + stopword_list = self._load_stopwords() text_redactions = text_redactions - stopword_list image_redaction_results: List[ImageRedactionResult] = [ From 64013e774e08e26e6bbcfea9532633fcf49ccbc2 Mon Sep 17 00:00:00 2001 From: hnikolov Date: Fri, 13 Mar 2026 13:23:53 +0000 Subject: [PATCH 03/19] requirements updated --- redactor/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/redactor/requirements.txt b/redactor/requirements.txt index 20a84b2b..eabf86fd 100644 --- a/redactor/requirements.txt +++ b/redactor/requirements.txt @@ -29,4 +29,5 @@ PyYAML==6.0.3 ruff==0.14.7 tiktoken==0.12.0 unidecode==1.4.0 -StrEnum==0.4.15 # Not ideal, but this is needed due to compatibility issues between ADO agents and the Function App \ No newline at end of file +StrEnum==0.4.15 # Not ideal, but this is needed due to compatibility issues between ADO agents and the Function App +xray==0.7.0 \ No newline at end of file From a059cd68c7a22eb2d0ef621e6c47aad8f247a4d8 Mon Sep 17 00:00:00 2001 From: hnikolov Date: Mon, 16 Mar 2026 08:48:18 +0000 Subject: [PATCH 04/19] Unit tests added --- .../file_processor/test_pdf_processor.py | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/redactor/test/unit_test/redaction/file_processor/test_pdf_processor.py b/redactor/test/unit_test/redaction/file_processor/test_pdf_processor.py index 64edb06d..a8563046 100644 --- a/redactor/test/unit_test/redaction/file_processor/test_pdf_processor.py +++ b/redactor/test/unit_test/redaction/file_processor/test_pdf_processor.py @@ -3,7 +3,8 @@ from PIL import Image from io import BytesIO -from mock import patch, Mock +import mock +from mock import patch, Mock, MagicMock from core.redaction.file_processor import ( PDFProcessor, @@ -960,3 +961,41 @@ def test__pdf_processor__apply(): assert expected_image == actual_image, ( "Expected the image in the pdf to be redacted, but it did not match the redacted sample" ) + +def test_find_bad_redactions(): + """ + - Given i have a pdf file with some content + - When i call PDFProcessor._find_bad_redactions + - The content is returned as a list + """ + file_bytes = BytesIO(b"fake pdf bytes") + mock_pdf = MagicMock() + mock_inspect_result = { + "page1": [{"text": "secret"}, {"text": "password"}], + "page2": [{"text": "token"}], + } + with patch("pymupdf.open", return_value=mock_pdf) as mock_open: + with patch("xray.inspect", return_value=mock_inspect_result) as mock_inspect: + obj = PDFProcessor() + result = obj._find_bad_redactions(file_bytes) + + assert result == ["secret", "password", "token"] + mock_open.assert_called_once_with(stream=file_bytes) + mock_inspect.assert_called_once_with(mock_pdf) + +def test_load_stopwords(): + """ + - Given i have a yaml file with some content + - When i call PDFProcessor._load_stopwords + - The yaml content is returned as a list + """ + mock_config_file_content = """ + stopwords: + - the + - test + """ + expected_output = ["the","test"] + with mock.patch( + "builtins.open", mock.mock_open(read_data=mock_config_file_content) + ): + assert PDFProcessor._load_stopwords("some_file") == expected_output \ No newline at end of file From f68ac0def2426a53ae86bb436de37a221fe6b947 Mon Sep 17 00:00:00 2001 From: hnikolov Date: Mon, 16 Mar 2026 10:22:42 +0000 Subject: [PATCH 05/19] Adding collections.abc to reqs to attempt import error resolution --- redactor/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/redactor/requirements.txt b/redactor/requirements.txt index eabf86fd..cba09064 100644 --- a/redactor/requirements.txt +++ b/redactor/requirements.txt @@ -30,4 +30,5 @@ ruff==0.14.7 tiktoken==0.12.0 unidecode==1.4.0 StrEnum==0.4.15 # Not ideal, but this is needed due to compatibility issues between ADO agents and the Function App -xray==0.7.0 \ No newline at end of file +xray==0.7.0 +pycopy-collections.abc==0.0.0 # Dummy as this should be included in standard packages \ No newline at end of file From 4b7ae806a3863d404dcf5b3cdd79ac2bdc27b3cf Mon Sep 17 00:00:00 2001 From: hnikolov Date: Mon, 16 Mar 2026 16:00:35 +0000 Subject: [PATCH 06/19] Stopwords yaml --- redactor/config/stopwords.yaml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 redactor/config/stopwords.yaml diff --git a/redactor/config/stopwords.yaml b/redactor/config/stopwords.yaml new file mode 100644 index 00000000..076bba37 --- /dev/null +++ b/redactor/config/stopwords.yaml @@ -0,0 +1,3 @@ +stopwords: + - "the" + - "my" \ No newline at end of file From 5bf767028d74eb4b740d7120f7feecd4d4288449 Mon Sep 17 00:00:00 2001 From: hnikolov Date: Tue, 17 Mar 2026 14:47:35 +0000 Subject: [PATCH 07/19] removed unused import --- redactor/core/redaction/file_processor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/redactor/core/redaction/file_processor.py b/redactor/core/redaction/file_processor.py index 09efe604..828ae2e6 100644 --- a/redactor/core/redaction/file_processor.py +++ b/redactor/core/redaction/file_processor.py @@ -35,7 +35,6 @@ from core.util.text_util import is_english_text, get_normalised_words, normalise_text from core.util.logging_util import LoggingUtil, log_to_appins from core.util.types import PydanticImage -import dataclasses import xray from core.util.metric_util import MetricUtil From b184c2e53dcd1bc0d4dac252cbab28753e1dc4b7 Mon Sep 17 00:00:00 2001 From: hnikolov Date: Wed, 18 Mar 2026 15:18:45 +0000 Subject: [PATCH 08/19] x-ray req file updated --- redactor/core/redaction/file_processor.py | 1 + redactor/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/redactor/core/redaction/file_processor.py b/redactor/core/redaction/file_processor.py index 828ae2e6..a34c8349 100644 --- a/redactor/core/redaction/file_processor.py +++ b/redactor/core/redaction/file_processor.py @@ -245,6 +245,7 @@ def _find_bad_redactions(self, file_bytes: BytesIO): pdf = pymupdf.open(stream=file_bytes) bad_redactions = xray.inspect(pdf) bad_redactions_list = [item["text"] for items in bad_redactions.values() for item in items] + print(bad_redactions_list) return bad_redactions_list def _load_stopwords(self): diff --git a/redactor/requirements.txt b/redactor/requirements.txt index cba09064..fccf053d 100644 --- a/redactor/requirements.txt +++ b/redactor/requirements.txt @@ -30,5 +30,5 @@ ruff==0.14.7 tiktoken==0.12.0 unidecode==1.4.0 StrEnum==0.4.15 # Not ideal, but this is needed due to compatibility issues between ADO agents and the Function App -xray==0.7.0 +x-ray==0.3.6 pycopy-collections.abc==0.0.0 # Dummy as this should be included in standard packages \ No newline at end of file From d19632a104d0df4b5ad43f9cde07aa002c18efbb Mon Sep 17 00:00:00 2001 From: hnikolov Date: Wed, 18 Mar 2026 15:33:44 +0000 Subject: [PATCH 09/19] Amend to add seek command to function - should work correctly now --- redactor/core/redaction/file_processor.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/redactor/core/redaction/file_processor.py b/redactor/core/redaction/file_processor.py index a34c8349..2f0b10ff 100644 --- a/redactor/core/redaction/file_processor.py +++ b/redactor/core/redaction/file_processor.py @@ -238,14 +238,12 @@ def _extract_pdf_text(self, file_bytes: BytesIO) -> str: def _find_bad_redactions(self, file_bytes: BytesIO): """ Return a list of bad redactions in the give PDF - :param BytesIO file_bytes: Bytes stream for the PDF :return List[]: the bad redaction strings """ - pdf = pymupdf.open(stream=file_bytes) - bad_redactions = xray.inspect(pdf) + file_bytes.seek(0) + bad_redactions = xray.inspect(file_bytes.read()) bad_redactions_list = [item["text"] for items in bad_redactions.values() for item in items] - print(bad_redactions_list) return bad_redactions_list def _load_stopwords(self): From 96696dea555134f4172afd2aec1f3fadd64762df Mon Sep 17 00:00:00 2001 From: Shannon Williams Date: Fri, 13 Mar 2026 13:51:32 +0000 Subject: [PATCH 10/19] Correct comparison and output analytics (#74) Switch "apply" and "redact" names in analytics output file. Make sure annotations are treated as positive predictions if `isRedactionCandidate` is `True`. Remove `pandas` to make unit tests pass and improve robustness --- redactor/core/redaction_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redactor/core/redaction_manager.py b/redactor/core/redaction_manager.py index c17904fb..c2567631 100644 --- a/redactor/core/redaction_manager.py +++ b/redactor/core/redaction_manager.py @@ -353,7 +353,7 @@ def _compare_redactions( proposed_candidates = [ {k: v for k, v in ann.items() if k in attrs_to_compare} for ann in proposed_annots_on_page - if ann.get("isRedactionCandidate", False) + if ann.get("isRedactionCandidate", True) ] n_proposed_redactions += len(proposed_candidates) From e9948171cc5870a5b05b5acf559ed0dbd19296cf Mon Sep 17 00:00:00 2001 From: hnikolov Date: Wed, 18 Mar 2026 15:18:45 +0000 Subject: [PATCH 11/19] x-ray req file updated --- redactor/core/redaction/file_processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/redactor/core/redaction/file_processor.py b/redactor/core/redaction/file_processor.py index db0f5189..0435208f 100644 --- a/redactor/core/redaction/file_processor.py +++ b/redactor/core/redaction/file_processor.py @@ -271,6 +271,7 @@ def _find_bad_redactions(self, file_bytes: BytesIO): file_bytes.seek(0) bad_redactions = xray.inspect(file_bytes.read()) bad_redactions_list = [item["text"] for items in bad_redactions.values() for item in items] + print(bad_redactions_list) return bad_redactions_list def _load_stopwords(self): From cd5df3ceee0d088c04282b515c79506b3a1a332f Mon Sep 17 00:00:00 2001 From: hnikolov Date: Wed, 18 Mar 2026 15:33:44 +0000 Subject: [PATCH 12/19] Amend to add seek command to function - should work correctly now --- redactor/core/redaction/file_processor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/redactor/core/redaction/file_processor.py b/redactor/core/redaction/file_processor.py index 0435208f..db0f5189 100644 --- a/redactor/core/redaction/file_processor.py +++ b/redactor/core/redaction/file_processor.py @@ -271,7 +271,6 @@ def _find_bad_redactions(self, file_bytes: BytesIO): file_bytes.seek(0) bad_redactions = xray.inspect(file_bytes.read()) bad_redactions_list = [item["text"] for items in bad_redactions.values() for item in items] - print(bad_redactions_list) return bad_redactions_list def _load_stopwords(self): From 5c1e4ea1867b3cd5d7628199fb677342832f0e34 Mon Sep 17 00:00:00 2001 From: hnikolov Date: Tue, 24 Mar 2026 15:58:44 +0000 Subject: [PATCH 13/19] ruff formatting --- redactor/core/redaction/file_processor.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/redactor/core/redaction/file_processor.py b/redactor/core/redaction/file_processor.py index db0f5189..5c637d57 100644 --- a/redactor/core/redaction/file_processor.py +++ b/redactor/core/redaction/file_processor.py @@ -270,10 +270,12 @@ def _find_bad_redactions(self, file_bytes: BytesIO): """ file_bytes.seek(0) bad_redactions = xray.inspect(file_bytes.read()) - bad_redactions_list = [item["text"] for items in bad_redactions.values() for item in items] + bad_redactions_list = [ + item["text"] for items in bad_redactions.values() for item in items + ] return bad_redactions_list - - def _load_stopwords(self): + + def _load_stopwords(self): """ Check the text_to_redact list against the list in the stopwords yaml From 2bc837f42ed04937de290876f12392f5996ae017 Mon Sep 17 00:00:00 2001 From: hnikolov Date: Tue, 24 Mar 2026 16:05:39 +0000 Subject: [PATCH 14/19] Attempt wthout pycopy-collections --- redactor/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redactor/requirements.txt b/redactor/requirements.txt index 6ea1b3d1..e933c7c7 100644 --- a/redactor/requirements.txt +++ b/redactor/requirements.txt @@ -31,5 +31,5 @@ tiktoken==0.12.0 unidecode==1.4.0 StrEnum==0.4.15 # Not ideal, but this is needed due to compatibility issues between ADO agents and the Function App x-ray==0.3.6 -pycopy-collections.abc==0.0.0 # Dummy as this should be included in standard packages +# pycopy-collections.abc==0.0.0 # Dummy as this should be included in standard packages numpy==2.2.6 From 311283e43334f0f622788686f821346a57bf5a5f Mon Sep 17 00:00:00 2001 From: hnikolov Date: Tue, 24 Mar 2026 16:16:04 +0000 Subject: [PATCH 15/19] ruff fix --- redactor/core/redaction/file_processor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/redactor/core/redaction/file_processor.py b/redactor/core/redaction/file_processor.py index 5c637d57..353be7ca 100644 --- a/redactor/core/redaction/file_processor.py +++ b/redactor/core/redaction/file_processor.py @@ -10,9 +10,8 @@ from io import BytesIO from PIL import Image from pydantic import BaseModel -from itertools import chain from yaml import safe_load -from pydantic import BaseModel, Field +from pydantic import Field from time import time from datetime import datetime From 5761e50b77d66120ecaa9d0155626a2e118ac747 Mon Sep 17 00:00:00 2001 From: hnikolov Date: Tue, 24 Mar 2026 16:27:43 +0000 Subject: [PATCH 16/19] Removing bad redactions, leaving stopwords --- redactor/core/redaction/file_processor.py | 17 ------------- redactor/requirements.txt | 2 -- .../file_processor/test_pdf_processor.py | 24 ++----------------- 3 files changed, 2 insertions(+), 41 deletions(-) diff --git a/redactor/core/redaction/file_processor.py b/redactor/core/redaction/file_processor.py index 353be7ca..5cb611b2 100644 --- a/redactor/core/redaction/file_processor.py +++ b/redactor/core/redaction/file_processor.py @@ -37,7 +37,6 @@ from core.util.text_util import is_english_text, get_normalised_words, normalise_text from core.util.logging_util import LoggingUtil, log_to_appins from core.util.types import PydanticImage -import xray from core.util.metric_util import MetricUtil @@ -261,19 +260,6 @@ def _extract_pdf_text(self, file_bytes: BytesIO) -> str: return None return "\n".join(page for page in pages) - def _find_bad_redactions(self, file_bytes: BytesIO): - """ - Return a list of bad redactions in the give PDF - :param BytesIO file_bytes: Bytes stream for the PDF - :return List[]: the bad redaction strings - """ - file_bytes.seek(0) - bad_redactions = xray.inspect(file_bytes.read()) - bad_redactions_list = [ - item["text"] for items in bad_redactions.values() for item in items - ] - return bad_redactions_list - def _load_stopwords(self): """ Check the text_to_redact list against the list in the stopwords yaml @@ -1255,9 +1241,6 @@ def redact( for result in text_redaction_results for redaction_string in result.redaction_strings ] - # Add bad redactions to the text redaction list - bad_redactions_list = self._find_bad_redactions(file_bytes) - text_redactions = text_redactions + bad_redactions_list # Remove stopwords from text redaction list stopword_list = self._load_stopwords() text_redactions = text_redactions - stopword_list diff --git a/redactor/requirements.txt b/redactor/requirements.txt index e933c7c7..69672134 100644 --- a/redactor/requirements.txt +++ b/redactor/requirements.txt @@ -30,6 +30,4 @@ ruff==0.14.7 tiktoken==0.12.0 unidecode==1.4.0 StrEnum==0.4.15 # Not ideal, but this is needed due to compatibility issues between ADO agents and the Function App -x-ray==0.3.6 -# pycopy-collections.abc==0.0.0 # Dummy as this should be included in standard packages numpy==2.2.6 diff --git a/redactor/test/unit_test/redaction/file_processor/test_pdf_processor.py b/redactor/test/unit_test/redaction/file_processor/test_pdf_processor.py index 9c6f3f53..7261ae58 100644 --- a/redactor/test/unit_test/redaction/file_processor/test_pdf_processor.py +++ b/redactor/test/unit_test/redaction/file_processor/test_pdf_processor.py @@ -1231,26 +1231,6 @@ def test__pdf_processor__apply(): "Expected the image in the pdf to be redacted, but it did not match the redacted sample" ) -def test_find_bad_redactions(): - """ - - Given i have a pdf file with some content - - When i call PDFProcessor._find_bad_redactions - - The content is returned as a list - """ - file_bytes = BytesIO(b"fake pdf bytes") - mock_pdf = MagicMock() - mock_inspect_result = { - "page1": [{"text": "secret"}, {"text": "password"}], - "page2": [{"text": "token"}], - } - with patch("pymupdf.open", return_value=mock_pdf) as mock_open: - with patch("xray.inspect", return_value=mock_inspect_result) as mock_inspect: - obj = PDFProcessor() - result = obj._find_bad_redactions(file_bytes) - - assert result == ["secret", "password", "token"] - mock_open.assert_called_once_with(stream=file_bytes) - mock_inspect.assert_called_once_with(mock_pdf) def test_load_stopwords(): """ @@ -1263,8 +1243,8 @@ def test_load_stopwords(): - the - test """ - expected_output = ["the","test"] + expected_output = ["the", "test"] with mock.patch( "builtins.open", mock.mock_open(read_data=mock_config_file_content) ): - assert PDFProcessor._load_stopwords("some_file") == expected_output \ No newline at end of file + assert PDFProcessor._load_stopwords("some_file") == expected_output From a30aaf68d80a43cedc34f0dd17324b2bfee52e1b Mon Sep 17 00:00:00 2001 From: hnikolov Date: Tue, 24 Mar 2026 16:48:57 +0000 Subject: [PATCH 17/19] Unit test failed, fix applied --- redactor/core/redaction/file_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redactor/core/redaction/file_processor.py b/redactor/core/redaction/file_processor.py index 5cb611b2..1b55c934 100644 --- a/redactor/core/redaction/file_processor.py +++ b/redactor/core/redaction/file_processor.py @@ -1243,7 +1243,7 @@ def redact( ] # Remove stopwords from text redaction list stopword_list = self._load_stopwords() - text_redactions = text_redactions - stopword_list + text_redactions = np.array(text_redactions) - np.array(stopword_list) image_redaction_results: List[ImageRedactionResult] = [ x From fbf8f024559ea0f416c42ba50a7b570bdf671fd1 Mon Sep 17 00:00:00 2001 From: hnikolov Date: Tue, 24 Mar 2026 16:54:38 +0000 Subject: [PATCH 18/19] fix attempt 2 --- redactor/core/redaction/file_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redactor/core/redaction/file_processor.py b/redactor/core/redaction/file_processor.py index 1b55c934..b85d7683 100644 --- a/redactor/core/redaction/file_processor.py +++ b/redactor/core/redaction/file_processor.py @@ -1243,7 +1243,7 @@ def redact( ] # Remove stopwords from text redaction list stopword_list = self._load_stopwords() - text_redactions = np.array(text_redactions) - np.array(stopword_list) + text_redactions = list(set(text_redactions) - set(stopword_list)) image_redaction_results: List[ImageRedactionResult] = [ x From 7b3e72d24b8f3f71e6a09b0d9238b270df22f35c Mon Sep 17 00:00:00 2001 From: hnikolov Date: Wed, 25 Mar 2026 12:12:03 +0000 Subject: [PATCH 19/19] Reverting unwanted change --- redactor/core/redaction_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redactor/core/redaction_manager.py b/redactor/core/redaction_manager.py index c2567631..c17904fb 100644 --- a/redactor/core/redaction_manager.py +++ b/redactor/core/redaction_manager.py @@ -353,7 +353,7 @@ def _compare_redactions( proposed_candidates = [ {k: v for k, v in ann.items() if k in attrs_to_compare} for ann in proposed_annots_on_page - if ann.get("isRedactionCandidate", True) + if ann.get("isRedactionCandidate", False) ] n_proposed_redactions += len(proposed_candidates)