From ec04a29aa5e19ce508a3c9bcf81079198749f952 Mon Sep 17 00:00:00 2001 From: Idevaldo De Lira Date: Fri, 16 Jan 2026 10:42:26 +0100 Subject: [PATCH 1/7] Adds test for HTML to plaintext conversion with line breaks including combined extra tags Fixes: OX-12298 --- src/test/kotlin/sirius/kernel/commons/StringsTest.kt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/test/kotlin/sirius/kernel/commons/StringsTest.kt b/src/test/kotlin/sirius/kernel/commons/StringsTest.kt index 8341f136..068de73b 100644 --- a/src/test/kotlin/sirius/kernel/commons/StringsTest.kt +++ b/src/test/kotlin/sirius/kernel/commons/StringsTest.kt @@ -425,5 +425,12 @@ class StringsTest { ) ) + assertEquals( + "bold\nmove", + Strings.cleanup( + "bold
move", + StringCleanup::htmlToPlainText + ) + ) } } From 15b65d5c44106863d3b4a56cf8fb89df665446ac Mon Sep 17 00:00:00 2001 From: Idevaldo De Lira Date: Fri, 16 Jan 2026 10:43:44 +0100 Subject: [PATCH 2/7] Adds a helper method to iterate lines of a string Fixes: OX-12298 --- .../java/sirius/kernel/commons/Strings.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/main/java/sirius/kernel/commons/Strings.java b/src/main/java/sirius/kernel/commons/Strings.java index e0908d3a..48016481 100644 --- a/src/main/java/sirius/kernel/commons/Strings.java +++ b/src/main/java/sirius/kernel/commons/Strings.java @@ -18,6 +18,8 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Scanner; +import java.util.function.BiConsumer; import java.util.function.UnaryOperator; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -861,4 +863,21 @@ private static String performPadding(@Nullable String input, @Nonnull String pad return sb.toString(); } + + /** + * Iterates over all lines of the given text and applies the given consumer for each line. + *

+ * This method works independent on the end-of-line characters used in the given text. + * + * @param text the text to iterate over + * @param lineConsumer the consumer to be applied for each line. The first parameter is the line number (starting at 1), + */ + public static void iterateLines(String text, BiConsumer lineConsumer) { + try (Scanner scanner = new Scanner(text)) { + int lineNumber = 0; + while (scanner.hasNextLine()) { + lineConsumer.accept(++lineNumber, scanner.nextLine()); + } + } + } } From 53d2f588167e894955698c19911366deb749d9c5 Mon Sep 17 00:00:00 2001 From: Idevaldo De Lira Date: Fri, 16 Jan 2026 10:43:56 +0100 Subject: [PATCH 3/7] Fixes typos and JavaDoc formatting Fixes: OX-12298 --- .../java/sirius/kernel/commons/Strings.java | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/main/java/sirius/kernel/commons/Strings.java b/src/main/java/sirius/kernel/commons/Strings.java index 48016481..dc883f26 100644 --- a/src/main/java/sirius/kernel/commons/Strings.java +++ b/src/main/java/sirius/kernel/commons/Strings.java @@ -306,10 +306,10 @@ public static boolean isHttpsUrl(@Nullable String value) { } /** - * Returns an url encoded representation of the given value with UTF-8 as character encoding. + * Returns a url encoded representation of the given value with UTF-8 as character encoding. * * @param value the value to be encoded. - * @return an url encoded representation of value, using UTF-8 as character encoding. + * @return a url encoded representation of value, using UTF-8 as character encoding. * @deprecated use {@link Urls#encode(String)} instead. */ @Nullable @@ -319,10 +319,10 @@ public static String urlEncode(@Nullable String value) { } /** - * Returns an url decoded representation of the given value with UTF-8 as character encoding. + * Returns a url decoded representation of the given value with UTF-8 as character encoding. * * @param value the value to be decoded. - * @return an url decoded representation of value, using UTF-8 as character encoding. + * @return a url decoded representation of value, using UTF-8 as character encoding. * @deprecated use {@link Urls#decode(String)} instead. */ @Nullable @@ -499,7 +499,7 @@ public static String truncateMiddle(@Nullable Object input, /** * Returns a string representation of the given map. *

- * Keys and values are separated by a colon (:) and entries by a new line. + * Keys and values are separated by a colon {@code :} and entries by a new line. * * @param source to map to be converted to a string * @return a string representation of the given map, or "" if the map was null @@ -633,7 +633,7 @@ public static String trim(Object object) { *

* Note that empty/null inputs will always result in an empty string. * - * @param inputString the string to clean-up + * @param inputString the string to clean up * @param cleanups the operations to perform, most probably some from {@link StringCleanup} * @return the cleaned up string * @see StringCleanup @@ -658,7 +658,7 @@ public static String cleanup(@Nullable String inputString, @Nonnull UnaryOperato *

* Note that empty/null inputs will always result in an empty string. * - * @param inputString the string to clean-up + * @param inputString the string to clean up * @param cleanups the operations to perform, most probably some from {@link StringCleanup} * @return the cleaned up string * @see StringCleanup @@ -753,7 +753,9 @@ public static String shorten(String string, int numChars) { * the replacement function. *

* To replace all occurrences of {@code #{X}} by {@code NLS.get("X")} one could use: - * {@code Strings.replaceAll(Pattern.compile("#\\{([^\\}]+)\\}"), someText, NLS::get)} + * + * Strings.replaceAll(Pattern.compile("#\\{([^\\}]+)\\}"}, someText, NLS::get) + * * * @param regEx the regular expression to replace in the given input * @param input the input to scan From 2e7ea54e549bb9723d5e3fc441e682c3ed878fd9 Mon Sep 17 00:00:00 2001 From: Idevaldo De Lira Date: Fri, 16 Jan 2026 10:45:33 +0100 Subject: [PATCH 4/7] Convert HTML to plain text line by line preserving the line breaks converted previously Fixes: OX-12298 --- .../sirius/kernel/commons/StringCleanup.java | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/main/java/sirius/kernel/commons/StringCleanup.java b/src/main/java/sirius/kernel/commons/StringCleanup.java index d5639246..f983567a 100644 --- a/src/main/java/sirius/kernel/commons/StringCleanup.java +++ b/src/main/java/sirius/kernel/commons/StringCleanup.java @@ -559,10 +559,22 @@ public static String htmlToPlainText(@Nonnull String input) { // Replace p tags with line breaks normalizedText = PATTERN_PP_TAG.matcher(normalizedText).replaceAll("\n"); normalizedText = PATTERN_P_TAG.matcher(normalizedText).replaceAll("\n"); - // Remove any other tags - normalizedText = Strings.cleanup(normalizedText, StringCleanup::removeXml); - // Decode entities - normalizedText = Strings.cleanup(normalizedText, StringCleanup::decodeHtmlEntities); + + // Iterates the lines to clean them up properly, preserving the line breaks converted above, + // as the RegEx used by removeXml would detect and clean them. + StringBuilder builder = new StringBuilder(); + Strings.iterateLines(normalizedText, (lineNumber, lineText) -> { + if (lineNumber > 1) { + builder.append("\n"); + } + + // Remove any other tags + String normalizedLine = Strings.cleanup(lineText, StringCleanup::removeXml); + // Decode entities + normalizedLine = Strings.cleanup(normalizedLine, StringCleanup::decodeHtmlEntities); + builder.append(normalizedLine); + }); + return builder.toString(); } return normalizedText; From 006fb1d4e69b6cd8f5faf4b2d81c75bfa875a1c9 Mon Sep 17 00:00:00 2001 From: Idevaldo De Lira Date: Fri, 16 Jan 2026 10:54:28 +0100 Subject: [PATCH 5/7] Improves JavaDoc Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/main/java/sirius/kernel/commons/Strings.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/sirius/kernel/commons/Strings.java b/src/main/java/sirius/kernel/commons/Strings.java index dc883f26..33a6bf29 100644 --- a/src/main/java/sirius/kernel/commons/Strings.java +++ b/src/main/java/sirius/kernel/commons/Strings.java @@ -872,7 +872,7 @@ private static String performPadding(@Nullable String input, @Nonnull String pad * This method works independent on the end-of-line characters used in the given text. * * @param text the text to iterate over - * @param lineConsumer the consumer to be applied for each line. The first parameter is the line number (starting at 1), + * @param lineConsumer the consumer to be applied for each line. The first parameter is the line number (starting at 1), the second is the corresponding line text. */ public static void iterateLines(String text, BiConsumer lineConsumer) { try (Scanner scanner = new Scanner(text)) { From 31cacac9ecec3ec2ed7a55ad6cff9b94649a62c8 Mon Sep 17 00:00:00 2001 From: Idevaldo De Lira Date: Fri, 16 Jan 2026 11:14:42 +0100 Subject: [PATCH 6/7] Uses String.lines instead of Scanner Fixes: OX-12298 --- src/main/java/sirius/kernel/commons/Strings.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/sirius/kernel/commons/Strings.java b/src/main/java/sirius/kernel/commons/Strings.java index 33a6bf29..385dc712 100644 --- a/src/main/java/sirius/kernel/commons/Strings.java +++ b/src/main/java/sirius/kernel/commons/Strings.java @@ -18,7 +18,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Scanner; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; import java.util.function.UnaryOperator; import java.util.regex.Matcher; @@ -875,11 +875,12 @@ private static String performPadding(@Nullable String input, @Nonnull String pad * @param lineConsumer the consumer to be applied for each line. The first parameter is the line number (starting at 1), the second is the corresponding line text. */ public static void iterateLines(String text, BiConsumer lineConsumer) { - try (Scanner scanner = new Scanner(text)) { - int lineNumber = 0; - while (scanner.hasNextLine()) { - lineConsumer.accept(++lineNumber, scanner.nextLine()); - } + if (isEmpty(text)) { + return; } + AtomicInteger lineNumber = new AtomicInteger(0); + text.lines().forEach(line -> { + lineConsumer.accept(lineNumber.incrementAndGet(), line); + }); } } From 0b420f544b509962bbab215396a1f3e452b095c6 Mon Sep 17 00:00:00 2001 From: Idevaldo De Lira Date: Fri, 16 Jan 2026 11:33:39 +0100 Subject: [PATCH 7/7] Drops method in favor of a vanilla functionality Fixes: OX-12298 --- .../sirius/kernel/commons/StringCleanup.java | 4 ++-- .../java/sirius/kernel/commons/Strings.java | 20 ------------------- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/src/main/java/sirius/kernel/commons/StringCleanup.java b/src/main/java/sirius/kernel/commons/StringCleanup.java index f983567a..89899ace 100644 --- a/src/main/java/sirius/kernel/commons/StringCleanup.java +++ b/src/main/java/sirius/kernel/commons/StringCleanup.java @@ -563,8 +563,8 @@ public static String htmlToPlainText(@Nonnull String input) { // Iterates the lines to clean them up properly, preserving the line breaks converted above, // as the RegEx used by removeXml would detect and clean them. StringBuilder builder = new StringBuilder(); - Strings.iterateLines(normalizedText, (lineNumber, lineText) -> { - if (lineNumber > 1) { + normalizedText.lines().forEach(lineText -> { + if (!builder.isEmpty()) { builder.append("\n"); } diff --git a/src/main/java/sirius/kernel/commons/Strings.java b/src/main/java/sirius/kernel/commons/Strings.java index 385dc712..1960fdf0 100644 --- a/src/main/java/sirius/kernel/commons/Strings.java +++ b/src/main/java/sirius/kernel/commons/Strings.java @@ -18,8 +18,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.BiConsumer; import java.util.function.UnaryOperator; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -865,22 +863,4 @@ private static String performPadding(@Nullable String input, @Nonnull String pad return sb.toString(); } - - /** - * Iterates over all lines of the given text and applies the given consumer for each line. - *

- * This method works independent on the end-of-line characters used in the given text. - * - * @param text the text to iterate over - * @param lineConsumer the consumer to be applied for each line. The first parameter is the line number (starting at 1), the second is the corresponding line text. - */ - public static void iterateLines(String text, BiConsumer lineConsumer) { - if (isEmpty(text)) { - return; - } - AtomicInteger lineNumber = new AtomicInteger(0); - text.lines().forEach(line -> { - lineConsumer.accept(lineNumber.incrementAndGet(), line); - }); - } }