From 03ed8fcc23ff0b54f7216827e2dde1bcc9d94390 Mon Sep 17 00:00:00 2001 From: Andreas Fiehn Date: Mon, 3 Nov 2025 19:07:52 +0100 Subject: [PATCH 1/2] Fix comma in encoded address header (issue 146), add test for it and fix issue136 test --- src/mailparser/core.py | 18 ++++++++++-------- tests/mails/mail_test_18 | 14 ++++++++++++++ tests/test_mail_parser.py | 19 +++++++++++++++---- 3 files changed, 39 insertions(+), 12 deletions(-) create mode 100644 tests/mails/mail_test_18 diff --git a/src/mailparser/core.py b/src/mailparser/core.py index c5e8658..225e1ca 100644 --- a/src/mailparser/core.py +++ b/src/mailparser/core.py @@ -554,15 +554,17 @@ def __getattr__(self, name): raw = self.message.get_all(name) return json.dumps(raw, ensure_ascii=False) - # object headers + # object headers elif name_header in ADDRESSES_HEADERS: - h = decode_header_part(self.message.get(name_header, str())) - h_parsed = email.utils.getaddresses([h], strict=True) - return ( - h_parsed - if h_parsed != [("", "")] - else email.utils.getaddresses([h], strict=False) - ) + raw_header = self.message.get(name_header, "") + # parse before decoding + parsed_addresses = email.utils.getaddresses([raw_header], strict=True) + + # decoded addresses + return [ + (("" if (decoded_name := decode_header_part(name)) == email_addr else decoded_name), email_addr) + for name, email_addr in parsed_addresses + ] # others headers else: diff --git a/tests/mails/mail_test_18 b/tests/mails/mail_test_18 new file mode 100644 index 0000000..9c74f2d --- /dev/null +++ b/tests/mails/mail_test_18 @@ -0,0 +1,14 @@ +MIME-Version: 1.0 +Date: Mon, 3 Nov 2025 18:23:00 +0100 +Content-Type: text/plain; charset=utf-8 +Subject: Test for Comma and Name Bugs +From: =?iso-8859-1?Q?Last=DFlName=2C_FirstName?= +To: =?UTF-8?B?dG9ueS5zdGFya0BleGFtcGxlLmNvbQ==?= +Cc: simple@example.net, =?UTF-8?Q?John_=22Johnny=22_Doe?= + +This is a test email body. + +It validates two specific fixes: +1. The 'From' header contains a comma in the encoded display name. +2. The 'To' header has an encoded display name that matches the email address. +3. The 'Cc' header has a mix of simple and encoded addresses. \ No newline at end of file diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 5fbab27..29c3606 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -61,6 +61,7 @@ mail_test_15 = os.path.join(base_path, "mails", "mail_test_15") mail_test_16 = os.path.join(base_path, "mails", "mail_test_16") mail_test_17 = os.path.join(base_path, "mails", "mail_test_17") +mail_test_18 = os.path.join(base_path, "mails", "mail_test_18") mail_malformed_1 = os.path.join(base_path, "mails", "mail_malformed_1") mail_malformed_2 = os.path.join(base_path, "mails", "mail_malformed_2") mail_malformed_3 = os.path.join(base_path, "mails", "mail_malformed_3") @@ -698,10 +699,7 @@ def test_issue_139(self): def test_issue_136(self): mail = mailparser.parse_from_file(mail_test_17) - assert mail.from_ == [ - ("", "notificaccion-clientes@bbva.mx"), - ("", "notificaccion-clientes@bbva.mx"), - ] + assert mail.from_ == [("", "notificaccion-clientes@bbva.mx"),] def test_str_method_with_message(self): """Test __str__ method returns subject when message exists""" @@ -938,3 +936,16 @@ def test_text_plain_8bit_encoding(self): mail = mailparser.parse_from_string(raw_mail) self.assertIn("This is plain text", mail.body) + + + def test_comma_in_name(self): + """ + Tests the fixes for both the 'comma-in-encoded-name' issue and the + 'encoded-name-equals-email' issue (from test_issue_136). + """ + + mail = mailparser.parse_from_file(mail_test_18) + + assert mail.from_ == [('LastßlName, FirstName', 'comma.name@example.com')] + assert mail.to == [('', 'tony.stark@example.com')] + assert mail.cc == [('', 'simple@example.net'), ('John "Johnny" Doe', 'john.doe@example.com')] \ No newline at end of file From ae027716b4cd0a3b6fa694f033a81dff3f5e33a7 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Thu, 6 Nov 2025 22:14:40 +0100 Subject: [PATCH 2/2] pre-commit: improve formatting and readability in MailParser and tests --- src/mailparser/core.py | 11 +++++++++-- tests/mails/mail_test_18 | 2 +- tests/test_mail_parser.py | 14 +++++++++----- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/mailparser/core.py b/src/mailparser/core.py index 225e1ca..9f07507 100644 --- a/src/mailparser/core.py +++ b/src/mailparser/core.py @@ -554,7 +554,7 @@ def __getattr__(self, name): raw = self.message.get_all(name) return json.dumps(raw, ensure_ascii=False) - # object headers + # object headers elif name_header in ADDRESSES_HEADERS: raw_header = self.message.get(name_header, "") # parse before decoding @@ -562,7 +562,14 @@ def __getattr__(self, name): # decoded addresses return [ - (("" if (decoded_name := decode_header_part(name)) == email_addr else decoded_name), email_addr) + ( + ( + "" + if (decoded_name := decode_header_part(name)) == email_addr + else decoded_name + ), + email_addr, + ) for name, email_addr in parsed_addresses ] diff --git a/tests/mails/mail_test_18 b/tests/mails/mail_test_18 index 9c74f2d..3b979ab 100644 --- a/tests/mails/mail_test_18 +++ b/tests/mails/mail_test_18 @@ -11,4 +11,4 @@ This is a test email body. It validates two specific fixes: 1. The 'From' header contains a comma in the encoded display name. 2. The 'To' header has an encoded display name that matches the email address. -3. The 'Cc' header has a mix of simple and encoded addresses. \ No newline at end of file +3. The 'Cc' header has a mix of simple and encoded addresses. diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 29c3606..8c38672 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -699,7 +699,9 @@ def test_issue_139(self): def test_issue_136(self): mail = mailparser.parse_from_file(mail_test_17) - assert mail.from_ == [("", "notificaccion-clientes@bbva.mx"),] + assert mail.from_ == [ + ("", "notificaccion-clientes@bbva.mx"), + ] def test_str_method_with_message(self): """Test __str__ method returns subject when message exists""" @@ -937,7 +939,6 @@ def test_text_plain_8bit_encoding(self): mail = mailparser.parse_from_string(raw_mail) self.assertIn("This is plain text", mail.body) - def test_comma_in_name(self): """ Tests the fixes for both the 'comma-in-encoded-name' issue and the @@ -946,6 +947,9 @@ def test_comma_in_name(self): mail = mailparser.parse_from_file(mail_test_18) - assert mail.from_ == [('LastßlName, FirstName', 'comma.name@example.com')] - assert mail.to == [('', 'tony.stark@example.com')] - assert mail.cc == [('', 'simple@example.net'), ('John "Johnny" Doe', 'john.doe@example.com')] \ No newline at end of file + assert mail.from_ == [("LastßlName, FirstName", "comma.name@example.com")] + assert mail.to == [("", "tony.stark@example.com")] + assert mail.cc == [ + ("", "simple@example.net"), + ('John "Johnny" Doe', "john.doe@example.com"), + ]