diff --git a/src/main/java/sirius/kernel/commons/StringCleanup.java b/src/main/java/sirius/kernel/commons/StringCleanup.java index d5639246..89899ace 100644 --- a/src/main/java/sirius/kernel/commons/StringCleanup.java +++ b/src/main/java/sirius/kernel/commons/StringCleanup.java @@ -559,10 +559,22 @@ public static String htmlToPlainText(@Nonnull String input) { // Replace p tags with line breaks normalizedText = PATTERN_PP_TAG.matcher(normalizedText).replaceAll("\n"); normalizedText = PATTERN_P_TAG.matcher(normalizedText).replaceAll("\n"); - // Remove any other tags - normalizedText = Strings.cleanup(normalizedText, StringCleanup::removeXml); - // Decode entities - normalizedText = Strings.cleanup(normalizedText, StringCleanup::decodeHtmlEntities); + + // Iterates the lines to clean them up properly, preserving the line breaks converted above, + // as the RegEx used by removeXml would detect and clean them. + StringBuilder builder = new StringBuilder(); + normalizedText.lines().forEach(lineText -> { + if (!builder.isEmpty()) { + builder.append("\n"); + } + + // Remove any other tags + String normalizedLine = Strings.cleanup(lineText, StringCleanup::removeXml); + // Decode entities + normalizedLine = Strings.cleanup(normalizedLine, StringCleanup::decodeHtmlEntities); + builder.append(normalizedLine); + }); + return builder.toString(); } return normalizedText; diff --git a/src/main/java/sirius/kernel/commons/Strings.java b/src/main/java/sirius/kernel/commons/Strings.java index e0908d3a..1960fdf0 100644 --- a/src/main/java/sirius/kernel/commons/Strings.java +++ b/src/main/java/sirius/kernel/commons/Strings.java @@ -304,10 +304,10 @@ public static boolean isHttpsUrl(@Nullable String value) { } /** - * Returns an url encoded representation of the given value with UTF-8 as character encoding. + * Returns a url encoded representation of the given value with UTF-8 as character encoding. * * @param value the value to be encoded. - * @return an url encoded representation of value, using UTF-8 as character encoding. + * @return a url encoded representation of value, using UTF-8 as character encoding. * @deprecated use {@link Urls#encode(String)} instead. */ @Nullable @@ -317,10 +317,10 @@ public static String urlEncode(@Nullable String value) { } /** - * Returns an url decoded representation of the given value with UTF-8 as character encoding. + * Returns a url decoded representation of the given value with UTF-8 as character encoding. * * @param value the value to be decoded. - * @return an url decoded representation of value, using UTF-8 as character encoding. + * @return a url decoded representation of value, using UTF-8 as character encoding. * @deprecated use {@link Urls#decode(String)} instead. */ @Nullable @@ -497,7 +497,7 @@ public static String truncateMiddle(@Nullable Object input, /** * Returns a string representation of the given map. *
- * Keys and values are separated by a colon (:) and entries by a new line. + * Keys and values are separated by a colon {@code :} and entries by a new line. * * @param source to map to be converted to a string * @return a string representation of the given map, or "" if the map was null @@ -631,7 +631,7 @@ public static String trim(Object object) { *
* Note that empty/null inputs will always result in an empty string. * - * @param inputString the string to clean-up + * @param inputString the string to clean up * @param cleanups the operations to perform, most probably some from {@link StringCleanup} * @return the cleaned up string * @see StringCleanup @@ -656,7 +656,7 @@ public static String cleanup(@Nullable String inputString, @Nonnull UnaryOperato *
* Note that empty/null inputs will always result in an empty string. * - * @param inputString the string to clean-up + * @param inputString the string to clean up * @param cleanups the operations to perform, most probably some from {@link StringCleanup} * @return the cleaned up string * @see StringCleanup @@ -751,7 +751,9 @@ public static String shorten(String string, int numChars) { * the replacement function. *
* To replace all occurrences of {@code #{X}} by {@code NLS.get("X")} one could use:
- * {@code Strings.replaceAll(Pattern.compile("#\\{([^\\}]+)\\}"), someText, NLS::get)}
+ *
+ * Strings.replaceAll(Pattern.compile("#\\{([^\\}]+)\\}"}, someText, NLS::get)
+ *
*
* @param regEx the regular expression to replace in the given input
* @param input the input to scan
diff --git a/src/test/kotlin/sirius/kernel/commons/StringsTest.kt b/src/test/kotlin/sirius/kernel/commons/StringsTest.kt
index 8341f136..068de73b 100644
--- a/src/test/kotlin/sirius/kernel/commons/StringsTest.kt
+++ b/src/test/kotlin/sirius/kernel/commons/StringsTest.kt
@@ -425,5 +425,12 @@ class StringsTest {
)
)
+ assertEquals(
+ "bold\nmove",
+ Strings.cleanup(
+ "bold
move",
+ StringCleanup::htmlToPlainText
+ )
+ )
}
}