From a72d2e920ef18618a36a58714cd06d916b01a30b Mon Sep 17 00:00:00 2001 From: Tatevik Date: Thu, 11 Dec 2025 10:40:57 +0400 Subject: [PATCH 01/10] UserPersonalizer in CampaignProcessorMessageHandler --- .../CampaignProcessorMessageHandler.php | 4 ++++ .../Service/MessageProcessingPreparator.php | 7 +------ .../Service/RateLimitedCampaignMailer.php | 21 +++++++++++-------- .../CampaignProcessorMessageHandlerTest.php | 16 ++++++++++++++ .../MessageProcessingPreparatorTest.php | 10 --------- 5 files changed, 33 insertions(+), 25 deletions(-) diff --git a/src/Domain/Messaging/MessageHandler/CampaignProcessorMessageHandler.php b/src/Domain/Messaging/MessageHandler/CampaignProcessorMessageHandler.php index 12d45c30..f5154563 100644 --- a/src/Domain/Messaging/MessageHandler/CampaignProcessorMessageHandler.php +++ b/src/Domain/Messaging/MessageHandler/CampaignProcessorMessageHandler.php @@ -5,6 +5,7 @@ namespace PhpList\Core\Domain\Messaging\MessageHandler; use Doctrine\ORM\EntityManagerInterface; +use PhpList\Core\Domain\Configuration\Service\UserPersonalizer; use PhpList\Core\Domain\Messaging\Exception\MessageSizeLimitExceededException; use PhpList\Core\Domain\Messaging\Message\CampaignProcessorMessage; use PhpList\Core\Domain\Messaging\Message\SyncCampaignProcessorMessage; @@ -56,6 +57,7 @@ public function __construct( private readonly EventLogManager $eventLogManager, private readonly MessageDataManager $messageDataManager, private readonly MessagePrecacheService $precacheService, + private readonly UserPersonalizer $userPersonalizer, ?int $maxMailSize = null, ) { $this->maxMailSize = $maxMailSize ?? 0; @@ -159,6 +161,8 @@ private function handleEmailSending( Message\MessageContent $precachedContent, ): void { $processed = $this->messagePreparator->processMessageLinks($campaign->getId(), $precachedContent, $subscriber); + $processed->setText($this->userPersonalizer->personalize($processed->getText(), $subscriber->getEmail())); + $processed->setFooter($this->userPersonalizer->personalize($processed->getFooter(), $subscriber->getEmail())); try { $email = $this->mailer->composeEmail($campaign, $subscriber, $processed); diff --git a/src/Domain/Messaging/Service/MessageProcessingPreparator.php b/src/Domain/Messaging/Service/MessageProcessingPreparator.php index aee1d9e7..549a439e 100644 --- a/src/Domain/Messaging/Service/MessageProcessingPreparator.php +++ b/src/Domain/Messaging/Service/MessageProcessingPreparator.php @@ -5,8 +5,6 @@ namespace PhpList\Core\Domain\Messaging\Service; use PhpList\Core\Domain\Analytics\Service\LinkTrackService; -use PhpList\Core\Domain\Configuration\Service\UserPersonalizer; -use PhpList\Core\Domain\Messaging\Model\Message; use PhpList\Core\Domain\Messaging\Model\Message\MessageContent; use PhpList\Core\Domain\Messaging\Repository\MessageRepository; use PhpList\Core\Domain\Subscription\Model\Subscriber; @@ -24,7 +22,6 @@ public function __construct( private readonly MessageRepository $messageRepository, private readonly LinkTrackService $linkTrackService, private readonly TranslatorInterface $translator, - private readonly UserPersonalizer $userPersonalizer, ) { } @@ -78,16 +75,14 @@ public function processMessageLinks( $htmlText = $content->getText(); $footer = $content->getFooter(); - // todo: check other configured data that should be used in mail formatting/creation + // todo: check if getTextMessage should replace links as well if ($htmlText !== null) { $htmlText = $this->replaceLinks($savedLinks, $htmlText); - $htmlText = $this->userPersonalizer->personalize($htmlText, $subscriber->getEmail()); $content->setText($htmlText); } if ($footer !== null) { $footer = $this->replaceLinks($savedLinks, $footer); - $footer = $this->userPersonalizer->personalize($footer, $subscriber->getEmail()); $content->setFooter($footer); } diff --git a/src/Domain/Messaging/Service/RateLimitedCampaignMailer.php b/src/Domain/Messaging/Service/RateLimitedCampaignMailer.php index de2b73c1..97027d16 100644 --- a/src/Domain/Messaging/Service/RateLimitedCampaignMailer.php +++ b/src/Domain/Messaging/Service/RateLimitedCampaignMailer.php @@ -20,23 +20,26 @@ public function __construct(MailerInterface $mailer, SendRateLimiter $limiter) $this->limiter = $limiter; } - public function composeEmail(Message $processed, Subscriber $subscriber, Message\MessageContent $content): Email - { + public function composeEmail( + Message $message, + Subscriber $subscriber, + Message\MessageContent $processedContent, + ): Email { $email = new Email(); - if ($processed->getOptions()->getFromField() !== '') { - $email->from($processed->getOptions()->getFromField()); + if ($message->getOptions()->getFromField() !== '') { + $email->from($message->getOptions()->getFromField()); } - if ($processed->getOptions()->getReplyTo() !== '') { - $email->replyTo($processed->getOptions()->getReplyTo()); + if ($message->getOptions()->getReplyTo() !== '') { + $email->replyTo($message->getOptions()->getReplyTo()); } return $email ->to($subscriber->getEmail()) - ->subject($content->getSubject()) + ->subject($processedContent->getSubject()) // todo: check HTML2Text functionality - ->text($content->getTextMessage()) - ->html($content->getText()); + ->text($processedContent->getTextMessage()) + ->html($processedContent->getText()); } /** diff --git a/tests/Unit/Domain/Messaging/MessageHandler/CampaignProcessorMessageHandlerTest.php b/tests/Unit/Domain/Messaging/MessageHandler/CampaignProcessorMessageHandlerTest.php index a565f558..0e61d5dd 100644 --- a/tests/Unit/Domain/Messaging/MessageHandler/CampaignProcessorMessageHandlerTest.php +++ b/tests/Unit/Domain/Messaging/MessageHandler/CampaignProcessorMessageHandlerTest.php @@ -7,6 +7,7 @@ use Doctrine\ORM\EntityManagerInterface; use Exception; use PhpList\Core\Domain\Configuration\Service\Manager\EventLogManager; +use PhpList\Core\Domain\Configuration\Service\UserPersonalizer; use PhpList\Core\Domain\Messaging\Message\CampaignProcessorMessage; use PhpList\Core\Domain\Messaging\MessageHandler\CampaignProcessorMessageHandler; use PhpList\Core\Domain\Messaging\Model\Message; @@ -57,10 +58,18 @@ protected function setUp(): void $requeueHandler = $this->createMock(RequeueHandler::class); $this->translator = $this->createMock(Translator::class); $this->precacheService = $this->createMock(MessagePrecacheService::class); + $userPersonalizer = $this->createMock(UserPersonalizer::class); $timeLimiter->method('start'); $timeLimiter->method('shouldStop')->willReturn(false); + // Ensure personalization returns original text so assertions on replaced links remain valid + $userPersonalizer + ->method('personalize') + ->willReturnCallback(function (string $text) { + return $text; + }); + $this->handler = new CampaignProcessorMessageHandler( mailer: $this->mailer, entityManager: $this->entityManager, @@ -77,6 +86,7 @@ protected function setUp(): void eventLogManager: $this->createMock(EventLogManager::class), messageDataManager: $this->createMock(MessageDataManager::class), precacheService: $this->precacheService, + userPersonalizer: $userPersonalizer, maxMailSize: 0, ); } @@ -166,6 +176,8 @@ public function testInvokeWithValidSubscriberEmail(): void { $campaign = $this->createMock(Message::class); $content = $this->createContentMock(); + $content->method('getText')->willReturn('

Test HTML message

'); + $content->method('getFooter')->willReturn('

Test footer message

'); $campaign->method('getContent')->willReturn($content); $metadata = $this->createMock(MessageMetadata::class); $campaign->method('getMetadata')->willReturn($metadata); @@ -225,6 +237,8 @@ public function testInvokeWithMailerException(): void { $campaign = $this->createMock(Message::class); $content = $this->createContentMock(); + $content->method('getText')->willReturn('

Test HTML message

'); + $content->method('getFooter')->willReturn('

Test footer message

'); $metadata = $this->createMock(MessageMetadata::class); $campaign->method('getContent')->willReturn($content); $campaign->method('getMetadata')->willReturn($metadata); @@ -278,6 +292,8 @@ public function testInvokeWithMultipleSubscribers(): void { $campaign = $this->createCampaignMock(); $content = $this->createContentMock(); + $content->method('getText')->willReturn('

Test HTML message

'); + $content->method('getFooter')->willReturn('

Test footer message

'); $metadata = $this->createMock(MessageMetadata::class); $campaign->method('getMetadata')->willReturn($metadata); $campaign->method('getId')->willReturn(1); diff --git a/tests/Unit/Domain/Messaging/Service/MessageProcessingPreparatorTest.php b/tests/Unit/Domain/Messaging/Service/MessageProcessingPreparatorTest.php index b7530895..9ba29c46 100644 --- a/tests/Unit/Domain/Messaging/Service/MessageProcessingPreparatorTest.php +++ b/tests/Unit/Domain/Messaging/Service/MessageProcessingPreparatorTest.php @@ -6,7 +6,6 @@ use PhpList\Core\Domain\Analytics\Model\LinkTrack; use PhpList\Core\Domain\Analytics\Service\LinkTrackService; -use PhpList\Core\Domain\Configuration\Service\UserPersonalizer; use PhpList\Core\Domain\Messaging\Model\Message\MessageContent; use PhpList\Core\Domain\Messaging\Repository\MessageRepository; use PhpList\Core\Domain\Messaging\Service\MessageProcessingPreparator; @@ -23,7 +22,6 @@ class MessageProcessingPreparatorTest extends TestCase private SubscriberRepository&MockObject $subscriberRepository; private MessageRepository&MockObject $messageRepository; private LinkTrackService&MockObject $linkTrackService; - private UserPersonalizer&MockObject $userPersonalizer; private OutputInterface&MockObject $output; private MessageProcessingPreparator $preparator; @@ -32,13 +30,6 @@ protected function setUp(): void $this->subscriberRepository = $this->createMock(SubscriberRepository::class); $this->messageRepository = $this->createMock(MessageRepository::class); $this->linkTrackService = $this->createMock(LinkTrackService::class); - $this->userPersonalizer = $this->createMock(UserPersonalizer::class); - // Ensure personalization returns original text so assertions on replaced links remain valid - $this->userPersonalizer - ->method('personalize') - ->willReturnCallback(function (string $text) { - return $text; - }); $this->output = $this->createMock(OutputInterface::class); $this->preparator = new MessageProcessingPreparator( @@ -46,7 +37,6 @@ protected function setUp(): void messageRepository: $this->messageRepository, linkTrackService: $this->linkTrackService, translator: new Translator('en'), - userPersonalizer: $this->userPersonalizer, ); } From d40dedd67a6ff04416c355afee1da3bde0ef8e6e Mon Sep 17 00:00:00 2001 From: Tatevik Date: Thu, 11 Dec 2025 13:22:35 +0400 Subject: [PATCH 02/10] HtmlToText --- config/parameters.yml.dist | 4 + src/Domain/Messaging/Service/HtmlToText.php | 85 +++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 src/Domain/Messaging/Service/HtmlToText.php diff --git a/config/parameters.yml.dist b/config/parameters.yml.dist index 41c9a20b..bc73fb60 100644 --- a/config/parameters.yml.dist +++ b/config/parameters.yml.dist @@ -89,3 +89,7 @@ parameters: env(MESSAGING_MAX_PROCESS_TIME): '600' messaging.max_mail_size: '%%env(MAX_MAILSIZE)%%' env(MAX_MAILSIZE): '209715200' + messaging.default_message_age: '%%env(DEFAULT_MESSAGEAGE)%%' + env(DEFAULT_MESSAGEAGE): '691200' + messaging.use_manual_text_part : '%%env(USE_MANUAL_TEXT_PART)%%' + env(USE_MANUAL_TEXT_PART): 0 diff --git a/src/Domain/Messaging/Service/HtmlToText.php b/src/Domain/Messaging/Service/HtmlToText.php new file mode 100644 index 00000000..a6775288 --- /dev/null +++ b/src/Domain/Messaging/Service/HtmlToText.php @@ -0,0 +1,85 @@ +]*>(.*?)<\/script\s*>/is", '', $text); + $text = preg_replace("/]*>(.*?)<\/style\s*>/is", '', $text); + + $text = preg_replace( + "/]*href=([\"\'])(.*)\\1[^>]*>(.*)<\/a>/Umis", + "[URLTEXT]\\3[ENDURLTEXT][LINK]\\2[ENDLINK]\n", + $text + ); + $text = preg_replace("/(.*?)<\/b\s*>/is", '*\\1*', $text); + $text = preg_replace("/(.*?)<\/h[\d]\s*>/is", "**\\1**\n", $text); + $text = preg_replace("/(.*?)<\/i\s*>/is", '/\\1/', $text); + $text = preg_replace("/<\/tr\s*?>/i", "<\/tr>\n\n", $text); + $text = preg_replace("/<\/p\s*?>/i", "<\/p>\n\n", $text); + $text = preg_replace('/]*?>/i', "
\n", $text); + $text = preg_replace("/]*?\/>/i", "\n", $text); + $text = preg_replace('/ $fullmatch) { + $linktext = $links[1][$matchindex]; + $linkurl = $links[2][$matchindex]; + // check if the text linked is a repetition of the URL + if (trim($linktext) == trim($linkurl) || + 'https://'.trim($linktext) == trim($linkurl) || + 'http://'.trim($linktext) == trim($linkurl) + ) { + $linkreplace = $linkurl; + } else { + //# if link is an anchor only, take it out + if (strpos($linkurl, '#') === 0) { + $linkreplace = $linktext; + } else { + $linkreplace = $linktext.' <'.$linkurl.'>'; + } + } + $text = str_replace($fullmatch, $linkreplace, $text); + } + $text = preg_replace( + "/]*>(.*?)<\/a>/is", + '[URLTEXT]\\2[ENDURLTEXT][LINK]\\1[ENDLINK]', + $text, + 500 + ); + + $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8'); + + $text = preg_replace('/###NL###/', "\n", $text); + $text = preg_replace("/\n /", "\n", $text); + $text = preg_replace("/\t/", ' ', $text); + + // reduce whitespace + while (preg_match('/ /', $text)) { + $text = preg_replace('/ /', ' ', $text); + } + while (preg_match("/\n\s*\n\s*\n/", $text)) { + $text = preg_replace("/\n\s*\n\s*\n/", "\n\n", $text); + } + $ww = $this->configProvider->getValue(ConfigOption::WordWrap) ?? self::WORD_WRAP; + + return wordwrap($text, $ww); + } +} From 613a1960322572647345acec00a2c387f4349480 Mon Sep 17 00:00:00 2001 From: Tatevik Date: Thu, 11 Dec 2025 13:26:57 +0400 Subject: [PATCH 03/10] MessageDataLoader --- .../Configuration/Model/ConfigOption.php | 9 + .../Repository/MessageDataRepository.php | 6 + .../Messaging/Service/MessageDataLoader.php | 201 ++++++++++++++++++ 3 files changed, 216 insertions(+) create mode 100644 src/Domain/Messaging/Service/MessageDataLoader.php diff --git a/src/Domain/Configuration/Model/ConfigOption.php b/src/Domain/Configuration/Model/ConfigOption.php index 86b9286e..fa2066a8 100644 --- a/src/Domain/Configuration/Model/ConfigOption.php +++ b/src/Domain/Configuration/Model/ConfigOption.php @@ -15,4 +15,13 @@ enum ConfigOption: string case SubscribeUrl = 'subscribeurl'; case Domain = 'domain'; case Website = 'website'; + case MessageFromAddress = 'message_from_address'; + case AlwaysAddGoogleTracking = 'always_add_googletracking'; + case AdminAddress = 'admin_address'; + case DefaultMessageTemplate = 'defaultmessagetemplate'; + case MessageFooter = 'messagefooter'; + case ForwardFooter = 'forwardfooter'; + case NotifyStartDefault = 'notifystart_default'; + case NotifyEndDefault = 'notifyend_default'; + case WordWrap = 'wordwrap'; } diff --git a/src/Domain/Messaging/Repository/MessageDataRepository.php b/src/Domain/Messaging/Repository/MessageDataRepository.php index 51b27a04..1ef70d39 100644 --- a/src/Domain/Messaging/Repository/MessageDataRepository.php +++ b/src/Domain/Messaging/Repository/MessageDataRepository.php @@ -17,4 +17,10 @@ public function findByIdAndName(int $messageId, string $name): ?MessageData { return $this->findOneBy(['id' => $messageId, 'name' => $name]); } + + /** @return MessageData[] */ + public function getForMessage(int $messageId): array + { + return $this->findBy(['id' => $messageId]); + } } diff --git a/src/Domain/Messaging/Service/MessageDataLoader.php b/src/Domain/Messaging/Service/MessageDataLoader.php new file mode 100644 index 00000000..0f899d28 --- /dev/null +++ b/src/Domain/Messaging/Service/MessageDataLoader.php @@ -0,0 +1,201 @@ +configProvider->getValue(ConfigOption::MessageFromAddress) + ?? $this->configProvider->getValue(ConfigOption::AdminAddress); + + $finishSending = time() + $this->defaultMessageAge; + + $messageData = [ + 'template' => $this->configProvider->getValue(ConfigOption::DefaultMessageTemplate), + 'sendformat' => 'HTML', + 'message' => '', + 'forwardmessage' => '', + 'textmessage' => '', + 'rsstemplate' => '', + 'embargo' => [ + 'year' => date('Y'), + 'month' => date('m'), + 'day' => date('d'), + 'hour' => date('H'), + 'minute' => date('i'), + ], + 'repeatinterval' => 0, + 'repeatuntil' => [ + 'year' => date('Y'), + 'month' => date('m'), + 'day' => date('d'), + 'hour' => date('H'), + 'minute' => date('i'), + ], + 'requeueinterval' => 0, + 'requeueuntil' => [ + 'year' => date('Y'), + 'month' => date('m'), + 'day' => date('d'), + 'hour' => date('H'), + 'minute' => date('i'), + ], + 'finishsending' => [ + 'year' => date('Y', $finishSending), + 'month' => date('m', $finishSending), + 'day' => date('d', $finishSending), + 'hour' => date('H', $finishSending), + 'minute' => date('i', $finishSending), + ], + 'fromfield' => '', + 'subject' => '', + 'forwardsubject' => '', + 'footer' => $this->configProvider->getValue(ConfigOption::MessageFooter), + 'forwardfooter' => $this->configProvider->getValue(ConfigOption::ForwardFooter), + 'status' => '', + 'tofield' => '', + 'replyto' => '', + 'targetlist' => [], + 'criteria_match' => '', + 'sendurl' => '', + 'sendmethod' => 'inputhere', + 'testtarget' => '', + 'notify_start' => $this->configProvider->getValue(ConfigOption::NotifyStartDefault), + 'notify_end' => $this->configProvider->getValue(ConfigOption::NotifyEndDefault), + 'google_track' => filter_var( + value: $this->configProvider->getValue(ConfigOption::AlwaysAddGoogleTracking), + filter: FILTER_VALIDATE_BOOL + ), + 'excludelist' => [], + 'sentastest' => 0, + ]; + // todo: set correct values from entity + $nonEmptyFields = array_filter( + get_object_vars($message), + fn($v) => $v !== null && $v !== '', + ); + foreach ($nonEmptyFields as $key => $val) { + $messageData[$key] = $val; + } + + $messageData['subject'] = $messageData['subject'] === '(no title)' ? '(no subject)' : $messageData['subject']; + + $storedMessageData = $this->messageDataRepository->getForMessage($message->getId()); + foreach ($storedMessageData as $storedMessageDatum) { + if (str_starts_with($storedMessageDatum->getData(), 'SER:')) { + $unserialized = unserialize(substr($storedMessageDatum->getData(), 4)); + $data = array_walk_recursive($unserialized, 'stripslashes'); + } else { + $data = stripslashes($storedMessageDatum->getData()); + } + if (!in_array($storedMessageDatum->getName(), ['astext', 'ashtml', 'astextandhtml', 'aspdf', 'astextandpdf'])) + { + //# don't overwrite counters in the message table from the data table + $messageData[stripslashes($storedMessageDatum->getName())] = $data; + } + } + + foreach (array('embargo', 'repeatuntil', 'requeueuntil') as $dateField) { + if (!is_array($messageData[$dateField])) { + $messageData[$dateField] = [ + 'year' => date('Y'), + 'month' => date('m'), + 'day' => date('d'), + 'hour' => date('H'), + 'minute' => date('i'), + ]; + } + } + + foreach($message->getListMessages() as $listMessage) { + $messageData['targetlist'][$listMessage->getListId()] = 1; + } + + //# backwards, check that the content has a url and use it to fill the sendurl + if (empty($messageData['sendurl'])) { + //# can't do "ungreedy matching, in case the URL has placeholders, but this can potentially throw problems + if (!empty($messageData['message']) && preg_match('/\[URL:(.*)\]/i', $messageData['message'], $regs)) { + $messageData['sendurl'] = $regs[1]; + } + } + if (empty($messageData['sendurl']) && !empty($messageData['message'])) { + // if there's a message and no url, make sure to show the editor, and not the URL input + $messageData['sendmethod'] = 'inputhere'; + } + + //## parse the from field into it's components - email and name + if (preg_match('/([^ ]+@[^ ]+)/', $messageData['fromfield'], $regs)) { + // if there is an email in the from, rewrite it as "name " + $messageData['fromname'] = str_replace($regs[0], '', $messageData['fromfield']); + $messageData['fromemail'] = $regs[0]; + // if the email has < and > take them out here + $messageData['fromemail'] = str_replace('<', '', $messageData['fromemail']); + $messageData['fromemail'] = str_replace('>', '', $messageData['fromemail']); + // make sure there are no quotes around the name + $messageData['fromname'] = str_replace('"', '', ltrim(rtrim($messageData['fromname']))); + } elseif (strpos($messageData['fromfield'], ' ')) { + // if there is a space, we need to add the email + $messageData['fromname'] = $messageData['fromfield']; + // $cached[$messageid]["fromemail"] = "listmaster@$domain"; + $messageData['fromemail'] = $defaultFrom; + } else { + $messageData['fromemail'] = $defaultFrom; + $messageData['fromname'] = $messageData['fromfield']; + } + // disallow an email address in the name + if (preg_match('/([^ ]+@[^ ]+)/', $messageData['fromname'], $regs)) { + $messageData['fromname'] = str_replace($regs[0], '', $messageData['fromname']); + } + // clean up + $messageData['fromemail'] = str_replace(',', '', $messageData['fromemail']); + $messageData['fromname'] = str_replace(',', '', $messageData['fromname']); + + $messageData['fromname'] = trim($messageData['fromname']); + + // erase double spacing + while (strpos($messageData['fromname'], ' ')) { + $messageData['fromname'] = str_replace(' ', ' ', $messageData['fromname']); + } + + //# if the name ends up being empty, copy the email + if (empty($messageData['fromname'])) { + $messageData['fromname'] = $messageData['fromemail']; + } + + if (isset($messageData['targetlist']['unselect'])) { + unset($messageData['targetlist']['unselect']); + } + if (isset($messageData['excludelist']['unselect'])) { + unset($messageData['excludelist']['unselect']); + } + + if (empty($messageData['campaigntitle'])) { + if ($messageData['subject'] != '(no subject)') { + $messageData['campaigntitle'] = $messageData['subject']; + } else { + $messageData['campaigntitle'] = '(no title)'; + } + } + //# copy subject to title + if ($messageData['campaigntitle'] == '(no title)' && $messageData['subject'] != '(no subject)') { + $messageData['campaigntitle'] = $messageData['subject']; + } + + return $messageData; + } +} From 759d8e062c2be4930c14ce6cae9ff6362155fd5e Mon Sep 17 00:00:00 2001 From: Tatevik Date: Thu, 11 Dec 2025 13:28:20 +0400 Subject: [PATCH 04/10] TextParser --- src/Domain/Messaging/Service/TextParser.php | 73 +++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 src/Domain/Messaging/Service/TextParser.php diff --git a/src/Domain/Messaging/Service/TextParser.php b/src/Domain/Messaging/Service/TextParser.php new file mode 100644 index 00000000..da0a3c83 --- /dev/null +++ b/src/Domain/Messaging/Service/TextParser.php @@ -0,0 +1,73 @@ +\\1', $text); + $link_pattern = "/(.*)(.*?)<\s*\/a\s*>(.*)/is"; + + $i = 0; + while (preg_match($link_pattern, $text, $matches)) { + $url = $matches[2]; + $rest = $matches[3]; + if (!preg_match('/^(http:)|(mailto:)|(ftp:)|(https:)/i', $url)) { + // avoid this + // + $url = preg_replace('/:/', '', $url); + } + $link[$i] = ''.$matches[4].''; + $text = $matches[1]."%%$i%%".$matches[5]; + ++$i; + } + + $text = preg_replace("/(www\.[a-zA-Z0-9\.\/#~:?+=&%@!_\\-]+)/i", 'http://\\1', $text); //make www. -> http://www. + $text = preg_replace("/(https?:\/\/)http?:\/\//i", '\\1', $text); //take out duplicate schema + $text = preg_replace("/(ftp:\/\/)http?:\/\//i", '\\1', $text); //take out duplicate schema + $text = preg_replace("/(https?:\/\/)(?!www)([a-zA-Z0-9\.\/#~:?+=&%@!_\\-]+)/i", + '\\2', + $text); //eg-- http://kernel.org -> http://kernel.org + + $text = preg_replace("/(https?:\/\/)(www\.)([a-zA-Z0-9\.\/#~:?+=&%@!\\-_]+)/i", + '\\2\\3', + $text); //eg -- http://www.google.com -> www.google.com + + // take off a possible last full stop and move it outside + $text = preg_replace("/(.*)\.<\/a>/i", + '\\2.', $text); + + for ($j = 0; $j < $i; ++$j) { + $replacement = $link[$j]; + $text = preg_replace("/\%\%$j\%\%/", $replacement, $text); + } + + // hmm, regular expression choke on some characters in the text + // first replace all the brackets with placeholders. + // we cannot use htmlspecialchars or addslashes, because some are needed + + $text = str_replace("\(", '', $text); + $text = str_replace("\)", '', $text); + $text = preg_replace('/\$/', '', $text); + + // @@@ to be xhtml compabible we'd have to close the

as well + // so for now, just make it two br/s, which will be done by replacing + // \n with
+ // $paragraph = '

'; + $br = '
'; + $text = preg_replace("/\r/", '', $text); + $text = preg_replace("/\n/", "$br\n", $text); + + // reverse our previous placeholders + $text = str_replace('', '(', $text); + $text = str_replace('', ')', $text); + return str_replace('', '$', $text); + } +} From d94f825ce115eaae5ddd75f654551f15a88c44dd Mon Sep 17 00:00:00 2001 From: Tatevik Date: Sat, 13 Dec 2025 18:39:49 +0400 Subject: [PATCH 05/10] RemotePageFetcher --- composer.json | 3 +- .../Service => Common}/HtmlToText.php | 2 +- src/Domain/Common/HtmlUrlRewriter.php | 190 +++++++++++++++++ src/Domain/Common/RemotePageFetcher.php | 121 +++++++++++ .../Service => Common}/TextParser.php | 2 +- .../Configuration/Model/ConfigOption.php | 1 + .../Repository/UrlCacheRepository.php | 18 ++ .../CampaignProcessorMessageHandler.php | 5 + .../Repository/TemplateRepository.php | 6 + .../Service/MessagePrecacheService.php | 193 +++++++++++++++++- 10 files changed, 535 insertions(+), 6 deletions(-) rename src/Domain/{Messaging/Service => Common}/HtmlToText.php (98%) create mode 100644 src/Domain/Common/HtmlUrlRewriter.php create mode 100644 src/Domain/Common/RemotePageFetcher.php rename src/Domain/{Messaging/Service => Common}/TextParser.php (98%) diff --git a/composer.json b/composer.json index e696c132..ca914980 100644 --- a/composer.json +++ b/composer.json @@ -79,7 +79,8 @@ "ext-imap": "*", "tatevikgr/rss-feed": "dev-main", "ext-pdo": "*", - "ezyang/htmlpurifier": "^4.19" + "ezyang/htmlpurifier": "^4.19", + "ext-libxml": "*" }, "require-dev": { "phpunit/phpunit": "^9.5", diff --git a/src/Domain/Messaging/Service/HtmlToText.php b/src/Domain/Common/HtmlToText.php similarity index 98% rename from src/Domain/Messaging/Service/HtmlToText.php rename to src/Domain/Common/HtmlToText.php index a6775288..d426ce41 100644 --- a/src/Domain/Messaging/Service/HtmlToText.php +++ b/src/Domain/Common/HtmlToText.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace PhpList\Core\Domain\Messaging\Service; +namespace PhpList\Core\Domain\Common; use PhpList\Core\Domain\Configuration\Model\ConfigOption; use PhpList\Core\Domain\Configuration\Service\Provider\ConfigProvider; diff --git a/src/Domain/Common/HtmlUrlRewriter.php b/src/Domain/Common/HtmlUrlRewriter.php new file mode 100644 index 00000000..bba0b46e --- /dev/null +++ b/src/Domain/Common/HtmlUrlRewriter.php @@ -0,0 +1,190 @@ +

' . $html . '
'; + $dom->loadHTML($wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + + $xpath = new DOMXPath($dom); + + // Attributes to rewrite + $attrMap = [ + '//*[@src]' => 'src', + '//*[@href]' => 'href', + '//*[@action]' => 'action', + '//*[@background]' => 'background', + ]; + + foreach ($attrMap as $query => $attr) { + foreach ($xpath->query($query) as $node) { + /** @var DOMElement $node */ + $val = $node->getAttribute($attr); + $node->setAttribute($attr, $this->absolutizeUrl($val, $baseUrl)); + } + } + + // srcset needs special handling (multiple candidates) + foreach ($xpath->query('//*[@srcset]') as $node) { + /** @var DOMElement $node */ + $node->setAttribute('srcset', $this->rewriteSrcset($node->getAttribute('srcset'), $baseUrl)); + } + + // 2) Rewrite inline