From d1cb0d9a9f9d3e5f0f0945c3c0bf9be3eb206d1b Mon Sep 17 00:00:00 2001 From: Ivan Chen Date: Fri, 9 Nov 2018 12:24:25 -0500 Subject: [PATCH 1/2] forced to use hex byte encoding --- pdf_redactor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdf_redactor.py b/pdf_redactor.py index fc04b1b..1027de3 100644 --- a/pdf_redactor.py +++ b/pdf_redactor.py @@ -376,7 +376,7 @@ def __str__(self): # __str__ is used for serialization if self.value == self.original_value: # If unchanged, return the raw original value without decoding/encoding. - return PdfString.from_bytes(self.raw_original_value) + return PdfString.from_bytes(self.raw_original_value, bytes_encoding='hex') else: # If the value changed, encode it from Unicode according to the encoding # of the font that is active at the location of this token. From 261717fb5c99bda38338531105582b7569a693a0 Mon Sep 17 00:00:00 2001 From: Ivan Chen Date: Mon, 26 Nov 2018 13:15:10 -0500 Subject: [PATCH 2/2] hex byte encoding for redacted texts also When working on text that should be redacted, the replacement should also be encoded as 'hex' because the replacement text might not show up. --- pdf_redactor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdf_redactor.py b/pdf_redactor.py index 1027de3..abcee11 100644 --- a/pdf_redactor.py +++ b/pdf_redactor.py @@ -380,7 +380,7 @@ def __str__(self): else: # If the value changed, encode it from Unicode according to the encoding # of the font that is active at the location of this token. - return PdfString.from_bytes(fromUnicode(self.value, self.font, fontcache, options)) + return PdfString.from_bytes(fromUnicode(self.value, self.font, fontcache, options), bytes_encoding='hex') def __repr__(self): # __repr__ is used for debugging return "Token<%s>" % repr(self.value)