Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@
"ext-imap": "*",
"tatevikgr/rss-feed": "dev-main",
"ext-pdo": "*",
"ezyang/htmlpurifier": "^4.19"
"ezyang/htmlpurifier": "^4.19",
"ext-libxml": "*",
"ext-gd": "*"
},
"require-dev": {
"phpunit/phpunit": "^9.5",
Expand Down
9 changes: 9 additions & 0 deletions config/parameters.yml.dist
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,12 @@ parameters:
env(MESSAGING_MAX_PROCESS_TIME): '600'
messaging.max_mail_size: '%%env(MAX_MAILSIZE)%%'
env(MAX_MAILSIZE): '209715200'
messaging.default_message_age: '%%env(DEFAULT_MESSAGEAGE)%%'
env(DEFAULT_MESSAGEAGE): '691200'
messaging.use_manual_text_part : '%%env(USE_MANUAL_TEXT_PART)%%'
env(USE_MANUAL_TEXT_PART): 0

phplist.upload_images_dir: '%%env(PHPLIST_UPLOADIMAGES_DIR)%%'
env(PHPLIST_UPLOADIMAGES_DIR): 'images'
phplist.public_schema: '%%env(PUBLIC_SCHEMA)%%'
env(PUBLIC_SCHEMA): 'http'
Comment on lines +92 to +100
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Minor formatting inconsistency.

Line 94 has a space before the colon: messaging.use_manual_text_part : while all other parameter keys follow the pattern key: with no space.

Apply this diff for consistency:

-    messaging.use_manual_text_part : '%%env(USE_MANUAL_TEXT_PART)%%'
+    messaging.use_manual_text_part: '%%env(USE_MANUAL_TEXT_PART)%%'
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
messaging.default_message_age: '%%env(DEFAULT_MESSAGEAGE)%%'
env(DEFAULT_MESSAGEAGE): '691200'
messaging.use_manual_text_part : '%%env(USE_MANUAL_TEXT_PART)%%'
env(USE_MANUAL_TEXT_PART): 0
phplist.upload_images_dir: '%%env(PHPLIST_UPLOADIMAGES_DIR)%%'
env(PHPLIST_UPLOADIMAGES_DIR): 'images'
phplist.public_schema: '%%env(PUBLIC_SCHEMA)%%'
env(PUBLIC_SCHEMA): 'http'
messaging.default_message_age: '%%env(DEFAULT_MESSAGEAGE)%%'
env(DEFAULT_MESSAGEAGE): '691200'
messaging.use_manual_text_part: '%%env(USE_MANUAL_TEXT_PART)%%'
env(USE_MANUAL_TEXT_PART): 0
phplist.upload_images_dir: '%%env(PHPLIST_UPLOADIMAGES_DIR)%%'
env(PHPLIST_UPLOADIMAGES_DIR): 'images'
phplist.public_schema: '%%env(PUBLIC_SCHEMA)%%'
env(PUBLIC_SCHEMA): 'http'
🤖 Prompt for AI Agents
In config/parameters.yml.dist around lines 92 to 100, there's a formatting
inconsistency at line 94 where the key "messaging.use_manual_text_part :" has an
extra space before the colon; update that line to remove the space so the key
follows the same "key:" pattern as the other parameters, keeping the existing
value/token unchanged and preserving surrounding indentation and newline
structure.

85 changes: 85 additions & 0 deletions src/Domain/Common/HtmlToText.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
<?php

declare(strict_types=1);

namespace PhpList\Core\Domain\Common;

use PhpList\Core\Domain\Configuration\Model\ConfigOption;
use PhpList\Core\Domain\Configuration\Service\Provider\ConfigProvider;

class HtmlToText
{
private const WORD_WRAP = 70;

public function __construct(private readonly ConfigProvider $configProvider)
{
}

public function __invoke(string $html): string
{
$text = preg_replace("/\r/", '', $html);

$text = preg_replace("/<script[^>]*>(.*?)<\/script\s*>/is", '', $text);
$text = preg_replace("/<style[^>]*>(.*?)<\/style\s*>/is", '', $text);

$text = preg_replace(
"/<a[^>]*href=([\"\'])(.*)\\1[^>]*>(.*)<\/a>/Umis",
"[URLTEXT]\\3[ENDURLTEXT][LINK]\\2[ENDLINK]\n",
$text
);
$text = preg_replace("/<b>(.*?)<\/b\s*>/is", '*\\1*', $text);
$text = preg_replace("/<h[\d]>(.*?)<\/h[\d]\s*>/is", "**\\1**\n", $text);
$text = preg_replace("/<i>(.*?)<\/i\s*>/is", '/\\1/', $text);
$text = preg_replace("/<\/tr\s*?>/i", "<\/tr>\n\n", $text);
$text = preg_replace("/<\/p\s*?>/i", "<\/p>\n\n", $text);
$text = preg_replace('/<br[^>]*?>/i', "<br>\n", $text);
$text = preg_replace("/<br[^>]*?\/>/i", "<br\/>\n", $text);
$text = preg_replace('/<table/i', "\n\n<table", $text);
$text = strip_tags($text);

// find all URLs and replace them back
preg_match_all('~\[URLTEXT\](.*)\[ENDURLTEXT\]\[LINK\](.*)\[ENDLINK\]~Umis', $text, $links);
foreach ($links[0] as $matchindex => $fullmatch) {
$linktext = $links[1][$matchindex];
$linkurl = $links[2][$matchindex];
// check if the text linked is a repetition of the URL
if (trim($linktext) == trim($linkurl) ||
'https://'.trim($linktext) == trim($linkurl) ||
'http://'.trim($linktext) == trim($linkurl)
) {
$linkreplace = $linkurl;
} else {
//# if link is an anchor only, take it out
if (strpos($linkurl, '#') === 0) {
$linkreplace = $linktext;
} else {
$linkreplace = $linktext.' <'.$linkurl.'>';
}
}
$text = str_replace($fullmatch, $linkreplace, $text);
}
$text = preg_replace(
"/<a href=[\"\'](.*?)[\"\'][^>]*>(.*?)<\/a>/is",
'[URLTEXT]\\2[ENDURLTEXT][LINK]\\1[ENDLINK]',
$text,
500
);

$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');

$text = preg_replace('/###NL###/', "\n", $text);
$text = preg_replace("/\n /", "\n", $text);
$text = preg_replace("/\t/", ' ', $text);

// reduce whitespace
while (preg_match('/ /', $text)) {
$text = preg_replace('/ /', ' ', $text);
}
while (preg_match("/\n\s*\n\s*\n/", $text)) {
$text = preg_replace("/\n\s*\n\s*\n/", "\n\n", $text);
}
$ww = $this->configProvider->getValue(ConfigOption::WordWrap) ?? self::WORD_WRAP;

return wordwrap($text, $ww);
Comment on lines +81 to +83
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Type mismatch: wordwrap expects int, but config returns string.

getValue() returns ?string, so $ww could be "75" (string). Cast to int for type safety.

-        $ww = $this->configProvider->getValue(ConfigOption::WordWrap) ?? self::WORD_WRAP;
+        $ww = (int) ($this->configProvider->getValue(ConfigOption::WordWrap) ?? self::WORD_WRAP);

         return wordwrap($text, $ww);
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
$ww = $this->configProvider->getValue(ConfigOption::WordWrap) ?? self::WORD_WRAP;
return wordwrap($text, $ww);
$ww = (int) ($this->configProvider->getValue(ConfigOption::WordWrap) ?? self::WORD_WRAP);
return wordwrap($text, $ww);
🤖 Prompt for AI Agents
In src/Domain/Common/HtmlToText.php around lines 81 to 83, the value retrieved
from configProvider is ?string but wordwrap requires an int; cast the config
value to int (e.g. $ww =
(int)($this->configProvider->getValue(ConfigOption::WordWrap) ??
self::WORD_WRAP);) or use intval with the same fallback so wordwrap always
receives an integer.

}
}
190 changes: 190 additions & 0 deletions src/Domain/Common/HtmlUrlRewriter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
<?php

declare(strict_types=1);

namespace PhpList\Core\Domain\Common;

use DOMDocument;
use DOMElement;
use DOMXPath;

class HtmlUrlRewriter
{
public function addAbsoluteResources(string $html, string $baseUrl): string
{
$baseUrl = rtrim($baseUrl, "/");

// 1) Rewrite HTML attributes via DOM (handles quotes, whitespace, etc.)
$dom = new DOMDocument();
libxml_use_internal_errors(true);

// Prevent DOMDocument from adding html/body tags if you pass fragments
$wrapped = '<!doctype html><meta charset="utf-8"><div id="__wrap__">' . $html . '</div>';
$dom->loadHTML($wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);

$xpath = new DOMXPath($dom);

// Attributes to rewrite
$attrMap = [
'//*[@src]' => 'src',
'//*[@href]' => 'href',
'//*[@action]' => 'action',
'//*[@background]' => 'background',
];

foreach ($attrMap as $query => $attr) {
foreach ($xpath->query($query) as $node) {
/** @var DOMElement $node */
$val = $node->getAttribute($attr);
$node->setAttribute($attr, $this->absolutizeUrl($val, $baseUrl));
}
}

// srcset needs special handling (multiple candidates)
foreach ($xpath->query('//*[@srcset]') as $node) {
/** @var DOMElement $node */
$node->setAttribute('srcset', $this->rewriteSrcset($node->getAttribute('srcset'), $baseUrl));
}

// 2) Rewrite inline <style> blocks (CSS)
foreach ($xpath->query('//style') as $styleNode) {
/** @var DOMElement $styleNode */
$css = $styleNode->nodeValue;
$styleNode->nodeValue = $this->rewriteCssUrls($css, $baseUrl);
}

// 3) Rewrite style="" attributes (CSS)
foreach ($xpath->query('//*[@style]') as $node) {
/** @var DOMElement $node */
$css = $node->getAttribute('style');
$node->setAttribute('style', $this->rewriteCssUrls($css, $baseUrl));
}

// Extract the original fragment back out
$wrap = $dom->getElementById('__wrap__');
$out = '';
foreach ($wrap->childNodes as $child) {
$out .= $dom->saveHTML($child);
}

libxml_clear_errors();

return $out;
}

/**
* Convert $url to an absolute URL based on $baseUrl.
* Leaves alone:
* - already absolute (scheme:)
* - protocol-relative (//example.com) => keeps host but adds scheme
* - anchors (#...)
* - placeholders like [SOMETHING]
* - mailto:, tel:, data:, javascript: (etc)
*/
public function absolutizeUrl(string $url, string $baseUrl): string
{
$url = trim($url);
if ($url === '' || $url[0] === '#') return $url;
if (preg_match('/\[[^\]]+\]/', $url)) return $url;

// already has a scheme (http:, https:, mailto:, data:, etc.)
if (preg_match('#^[a-z][a-z0-9+.-]*:#i', $url)) return $url;

$base = parse_url($baseUrl);
if (!$base || empty($base['scheme']) || empty($base['host'])) {
// If base is invalid, bail out rather than corrupt URLs
return $url;
}

// protocol-relative
if (str_starts_with($url, '//')) {
return $base['scheme'] . ':' . $url;
}

$basePath = $base['path'] ?? '/';
// If baseUrl points to a file, use its directory
if (!str_ends_with($basePath, '/')) {
$basePath = preg_replace('#/[^/]*$#', '/', $basePath);
}

if (str_starts_with($url, '/')) {
$path = $url;
} else {
$path = $basePath . $url;
}

$path = $this->normalizePath($path);

$port = isset($base['port']) ? ':' . $base['port'] : '';
return $base['scheme'] . '://' . $base['host'] . $port . $path;
}

function normalizePath(string $path): string
{
// Keep query/fragment if present
$parts = parse_url($path);
$p = $parts['path'] ?? $path;

$segments = explode('/', $p);
$out = [];
foreach ($segments as $seg) {
if ($seg === '' || $seg === '.') continue;
if ($seg === '..') {
array_pop($out);
continue;
}
$out[] = $seg;
}
$norm = '/' . implode('/', $out);

if (isset($parts['query'])) $norm .= '?' . $parts['query'];
if (isset($parts['fragment'])) $norm .= '#' . $parts['fragment'];
return $norm;
}

public function rewriteSrcset(string $srcset, string $baseUrl): string
{
// "a.jpg 1x, /b.jpg 2x" => absolutize each URL part
$candidates = array_map('trim', explode(',', $srcset));
foreach ($candidates as &$cand) {
if ($cand === '') continue;
// split at first whitespace: "url descriptor..."
if (preg_match('/^(\S+)(\s+.*)?$/', $cand, $m)) {
$u = $m[1];
$d = $m[2] ?? '';
$cand = $this->absolutizeUrl($u, $baseUrl) . $d;
}
}
return implode(', ', $candidates);
}

public function rewriteCssUrls(string $css, string $baseUrl): string
{
// url(...) handling (supports quotes or no quotes)
$css = preg_replace_callback(
'#url\(\s*(["\']?)(.*?)\1\s*\)#i',
function ($m) use ($baseUrl) {
$q = $m[1];
$u = $m[2];
$abs = $this->absolutizeUrl($u, $baseUrl);
return 'url(' . ($q !== '' ? $q : '') . $abs . ($q !== '' ? $q : '') . ')';
},
$css
);

// @import "..."; or @import url("..."); etc.
return preg_replace_callback(
'#@import\s+(?:url\()?(\s*["\']?)([^"\')\s;]+)\1\)?#i',
function ($m) use ($baseUrl) {
$q = trim($m[1]);
$u = $m[2];
$abs = $this->absolutizeUrl($u, $baseUrl);
// Preserve original form loosely
return str_starts_with($m[0], '@import url')
? '@import url(' . ($q ?: '') . $abs . ($q ?: '') . ')'
: '@import ' . ($q ?: '') . $abs . ($q ?: '');
},
$css
);
}
}
Loading
Loading