diff --git a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java
index 0af4f51f..5a3b5fe9 100644
--- a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java
+++ b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java
@@ -94,8 +94,170 @@ public void closeDocument() {
public void text(String textChunk) {
if (!skipText) {
- out.text(textChunk);
+ // Check if we're inside a CDATA element (style/script) with allowTextIn
+ // where tags are reclassified as UNESCAPED text and need to be validated
+ // Note: Only style and script are CDATA elements; noscript/noembed/noframes are PCDATA
+ boolean insideCdataElement = false;
+ for (int i = openElementStack.size() - 1; i >= 0; i -= 2) {
+ String adjustedName = openElementStack.get(i);
+ if (adjustedName != null
+ && allowedTextContainers.contains(adjustedName)
+ && ("style".equals(adjustedName) || "script".equals(adjustedName))) {
+ insideCdataElement = true;
+ break;
+ }
+ }
+
+ // If inside a CDATA element (style/script) with allowTextIn, we need to filter out
+ // HTML tags that aren't allowed because tags inside these blocks are reclassified
+ // as UNESCAPED text by the lexer
+ if (insideCdataElement && textChunk != null && textChunk.indexOf('<') >= 0) {
+ // Strip out HTML tags that aren't in the allowed elements list
+ String filtered = stripDisallowedTags(textChunk);
+ out.text(filtered);
+ } else {
+ out.text(textChunk);
+ }
+ }
+ }
+
+ /**
+ * Strips out HTML tags that aren't in the allowed elements list from text content.
+ * This is used when tags appear inside text containers (like style blocks) where
+ * they're treated as text but should still be validated.
+ */
+ private String stripDisallowedTags(String text) {
+ if (text == null) {
+ return text;
+ }
+
+ StringBuilder result = new StringBuilder();
+ int len = text.length();
+ int i = 0;
+
+ while (i < len) {
+ int tagStart = text.indexOf('<', i);
+ if (tagStart < 0) {
+ // No more tags, append the rest
+ result.append(text.substring(i));
+ break;
+ }
+
+ // Append text before the tag
+ if (tagStart > i) {
+ result.append(text.substring(i, tagStart));
+ }
+
+ // Find the end of the tag (either '>' or end of string)
+ int tagEnd = text.indexOf('>', tagStart + 1);
+ if (tagEnd < 0) {
+ // Unclosed tag, skip it
+ i = tagStart + 1;
+ continue;
+ }
+
+ // Extract the tag content (between < and >)
+ String tagContent = text.substring(tagStart + 1, tagEnd);
+
+ // Only process if this looks like a valid HTML element tag
+ // Valid tags start with a letter or / followed by a letter
+ // Skip things like <, >, <3, etc.
+ // Also handle tags with leading whitespace like < script>
+ boolean isValidTag = false;
+ String tagName = null;
+
+ // Trim leading whitespace for tag name detection
+ String trimmedTagContent = tagContent.trim();
+
+ if (trimmedTagContent.startsWith("/")) {
+ // Closing tag - must have / followed by a letter
+ if (trimmedTagContent.length() > 1) {
+ char firstChar = trimmedTagContent.charAt(1);
+ if (Character.isLetter(firstChar)) {
+ isValidTag = true;
+ tagName = trimmedTagContent.substring(1).trim().split("\\s")[0];
+ tagName = HtmlLexer.canonicalElementName(tagName);
+ }
+ }
+ } else {
+ // Opening tag - must start with a letter (after trimming whitespace)
+ if (trimmedTagContent.length() > 0) {
+ char firstChar = trimmedTagContent.charAt(0);
+ if (Character.isLetter(firstChar)) {
+ isValidTag = true;
+ tagName = trimmedTagContent.split("\\s")[0];
+ tagName = HtmlLexer.canonicalElementName(tagName);
+ }
+ }
+ }
+
+ if (!isValidTag) {
+ // Not a valid HTML tag, just append it as-is
+ result.append('<').append(tagContent).append('>');
+ i = tagEnd + 1;
+ continue;
+ }
+
+ // Check if it's a closing tag
+ if (tagContent.startsWith("/")) {
+ // Only allow closing tags if the element is allowed
+ if (elAndAttrPolicies.containsKey(tagName)) {
+ result.append('<').append(tagContent).append('>');
+ }
+ // Otherwise skip the closing tag
+ i = tagEnd + 1;
+ } else {
+ // Opening tag - only allow tags if the element is in the allowed list
+ if (elAndAttrPolicies.containsKey(tagName)) {
+ result.append('<').append(tagContent).append('>');
+ i = tagEnd + 1;
+ } else {
+ // Skip disallowed tag and its content until matching closing tag
+ i = tagEnd + 1;
+ // Track nesting level to find the matching closing tag
+ int nestingLevel = 1;
+ while (i < len && nestingLevel > 0) {
+ int nextTagStart = text.indexOf('<', i);
+ if (nextTagStart < 0) {
+ // No more tags, skip to end
+ i = len;
+ break;
+ }
+ int nextTagEnd = text.indexOf('>', nextTagStart + 1);
+ if (nextTagEnd < 0) {
+ // Unclosed tag, skip to end
+ i = len;
+ break;
+ }
+ String nextTagContent = text.substring(nextTagStart + 1, nextTagEnd);
+ String trimmedNextTagContent = nextTagContent.trim();
+ String nextTagName = trimmedNextTagContent.split("\\s")[0];
+ if (trimmedNextTagContent.startsWith("/")) {
+ // Closing tag
+ nextTagName = nextTagName.substring(1);
+ nextTagName = HtmlLexer.canonicalElementName(nextTagName);
+ if (nextTagName.equals(tagName)) {
+ nestingLevel--;
+ if (nestingLevel == 0) {
+ // Found matching closing tag, skip it and continue
+ i = nextTagEnd + 1;
+ break;
+ }
+ }
+ } else {
+ // Opening tag
+ nextTagName = HtmlLexer.canonicalElementName(nextTagName);
+ if (nextTagName.equals(tagName)) {
+ nestingLevel++;
+ }
+ }
+ i = nextTagEnd + 1;
+ }
+ }
+ }
}
+
+ return result.toString();
}
public void openTag(String elementName, List attrs) {
diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java
index 2ebf55ea..4065eb93 100644
--- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java
+++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java
@@ -46,14 +46,19 @@ public class HtmlLexerTest extends TestCase {
public final void testHtmlLexer() throws Exception {
// Do the lexing.
String input = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexerinput1.html").toURI())), StandardCharsets.UTF_8);
+ // Normalize line endings in input to handle Windows/Unix differences
+ input = input.replace("\r\n", "\n").replace("\r", "\n");
StringBuilder actual = new StringBuilder();
lex(input, actual);
// Get the golden.
String golden = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI())), StandardCharsets.UTF_8);
+ // Normalize line endings to handle Windows/Unix differences
+ golden = golden.replace("\r\n", "\n").replace("\r", "\n");
+ String actualStr = actual.toString().replace("\r\n", "\n").replace("\r", "\n");
// Compare.
- assertEquals(golden, actual.toString());
+ assertEquals(golden, actualStr);
}
@Test
diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java
index 633ed91d..17d4c15a 100644
--- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java
+++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java
@@ -162,4 +162,4 @@ public void run() {
}
}
-}
+}
\ No newline at end of file
diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java
index 1ff169df..003e52c8 100644
--- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java
+++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java
@@ -454,6 +454,158 @@ public static final void testStylingCornerCase() {
assertEquals(want, sanitize(input));
}
+ /**
+ * These 5 tests cover regression scenarios for CVE-2025-66021, which relates to
+ * improper sanitization of HTML content involving ";
+ String expectedPayload = "";
+
+ HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+ PolicyFactory policy = htmlPolicyBuilder
+ .allowElements("style", "noscript")
+ .allowTextIn("style")
+ .toFactory();
+
+ // Act
+ String sanitized = policy.sanitize(actualPayload);
+
+ // Assert
+ assertEquals(expectedPayload, sanitized);
+ }
+
+ /**
+ * Test #2:
+ * Ensure that ";
+ String expectedPayload = "";
+
+ HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+ PolicyFactory policy = htmlPolicyBuilder
+ .allowElements("style", "noscript")
+ .allowTextIn("style")
+ .toFactory();
+
+ // Act
+ String sanitized = policy.sanitize(actualPayload);
+
+ // Assert
+ assertEquals(expectedPayload, sanitized);
+ }
+
+ /**
+ * Test #3:
+ * Ensure that, if
is allowed, then
injected inside ";
+ String expectedPayload = "";
+
+ HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+ PolicyFactory policy = htmlPolicyBuilder
+ .allowElements("style", "noscript", "div")
+ .allowTextIn("style")
+ .toFactory();
+
+ // Act
+ String sanitized = policy.sanitize(actualPayload);
+
+ // Assert
+ assertEquals(expectedPayload, sanitized);
+ }
+
+ /**
+ * Test #4:
+ * Confirm that an attempt to prematurely close ";
+
+ HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+ PolicyFactory policy = htmlPolicyBuilder
+ .allowElements("style", "noscript", "p")
+ .allowTextIn("style")
+ .toFactory();
+
+ // Act
+ String sanitized = policy.sanitize(actualPayload);
+
+ // Assert
+ assertEquals(expectedPayload, sanitized);
+ }
+
+ /**
+ * Test #5:
+ * Like Test #4, but with