Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,170 @@ public void closeDocument() {

public void text(String textChunk) {
if (!skipText) {
out.text(textChunk);
// Check if we're inside a CDATA element (style/script) with allowTextIn
// where tags are reclassified as UNESCAPED text and need to be validated
// Note: Only style and script are CDATA elements; noscript/noembed/noframes are PCDATA
boolean insideCdataElement = false;
for (int i = openElementStack.size() - 1; i >= 0; i -= 2) {
String adjustedName = openElementStack.get(i);
if (adjustedName != null
&& allowedTextContainers.contains(adjustedName)
&& ("style".equals(adjustedName) || "script".equals(adjustedName))) {
insideCdataElement = true;
break;
}
}

// If inside a CDATA element (style/script) with allowTextIn, we need to filter out
// HTML tags that aren't allowed because tags inside these blocks are reclassified
// as UNESCAPED text by the lexer
if (insideCdataElement && textChunk != null && textChunk.indexOf('<') >= 0) {
// Strip out HTML tags that aren't in the allowed elements list
String filtered = stripDisallowedTags(textChunk);
out.text(filtered);
} else {
out.text(textChunk);
}
}
}

/**
* Strips out HTML tags that aren't in the allowed elements list from text content.
* This is used when tags appear inside text containers (like style blocks) where
* they're treated as text but should still be validated.
*/
private String stripDisallowedTags(String text) {
if (text == null) {
return text;
}

StringBuilder result = new StringBuilder();
int len = text.length();
int i = 0;

while (i < len) {
int tagStart = text.indexOf('<', i);
if (tagStart < 0) {
// No more tags, append the rest
result.append(text.substring(i));
break;
}

// Append text before the tag
if (tagStart > i) {
result.append(text.substring(i, tagStart));
}

// Find the end of the tag (either '>' or end of string)
int tagEnd = text.indexOf('>', tagStart + 1);
if (tagEnd < 0) {
// Unclosed tag, skip it
i = tagStart + 1;
continue;
}

// Extract the tag content (between < and >)
String tagContent = text.substring(tagStart + 1, tagEnd);

// Only process if this looks like a valid HTML element tag
// Valid tags start with a letter or / followed by a letter
// Skip things like <, </>, <3, etc.
// Also handle tags with leading whitespace like < script>
boolean isValidTag = false;
String tagName = null;

// Trim leading whitespace for tag name detection
String trimmedTagContent = tagContent.trim();

if (trimmedTagContent.startsWith("/")) {
// Closing tag - must have / followed by a letter
if (trimmedTagContent.length() > 1) {
char firstChar = trimmedTagContent.charAt(1);
if (Character.isLetter(firstChar)) {
isValidTag = true;
tagName = trimmedTagContent.substring(1).trim().split("\\s")[0];
tagName = HtmlLexer.canonicalElementName(tagName);
}
}
} else {
// Opening tag - must start with a letter (after trimming whitespace)
if (trimmedTagContent.length() > 0) {
char firstChar = trimmedTagContent.charAt(0);
if (Character.isLetter(firstChar)) {
isValidTag = true;
tagName = trimmedTagContent.split("\\s")[0];
tagName = HtmlLexer.canonicalElementName(tagName);
}
}
}

if (!isValidTag) {
// Not a valid HTML tag, just append it as-is
result.append('<').append(tagContent).append('>');
i = tagEnd + 1;
continue;
}

// Check if it's a closing tag
if (tagContent.startsWith("/")) {
// Only allow closing tags if the element is allowed
if (elAndAttrPolicies.containsKey(tagName)) {
result.append('<').append(tagContent).append('>');
}
// Otherwise skip the closing tag
i = tagEnd + 1;
} else {
// Opening tag - only allow tags if the element is in the allowed list
if (elAndAttrPolicies.containsKey(tagName)) {
result.append('<').append(tagContent).append('>');
i = tagEnd + 1;
} else {
// Skip disallowed tag and its content until matching closing tag
i = tagEnd + 1;
// Track nesting level to find the matching closing tag
int nestingLevel = 1;
while (i < len && nestingLevel > 0) {
int nextTagStart = text.indexOf('<', i);
if (nextTagStart < 0) {
// No more tags, skip to end
i = len;
break;
}
int nextTagEnd = text.indexOf('>', nextTagStart + 1);
if (nextTagEnd < 0) {
// Unclosed tag, skip to end
i = len;
break;
}
String nextTagContent = text.substring(nextTagStart + 1, nextTagEnd);
String trimmedNextTagContent = nextTagContent.trim();
String nextTagName = trimmedNextTagContent.split("\\s")[0];
if (trimmedNextTagContent.startsWith("/")) {
// Closing tag
nextTagName = nextTagName.substring(1);
nextTagName = HtmlLexer.canonicalElementName(nextTagName);
if (nextTagName.equals(tagName)) {
nestingLevel--;
if (nestingLevel == 0) {
// Found matching closing tag, skip it and continue
i = nextTagEnd + 1;
break;
}
}
} else {
// Opening tag
nextTagName = HtmlLexer.canonicalElementName(nextTagName);
if (nextTagName.equals(tagName)) {
nestingLevel++;
}
}
i = nextTagEnd + 1;
}
}
}
}

return result.toString();
}

public void openTag(String elementName, List<String> attrs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,19 @@ public class HtmlLexerTest extends TestCase {
public final void testHtmlLexer() throws Exception {
// Do the lexing.
String input = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexerinput1.html").toURI())), StandardCharsets.UTF_8);
// Normalize line endings in input to handle Windows/Unix differences
input = input.replace("\r\n", "\n").replace("\r", "\n");
StringBuilder actual = new StringBuilder();
lex(input, actual);

// Get the golden.
String golden = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI())), StandardCharsets.UTF_8);
// Normalize line endings to handle Windows/Unix differences
golden = golden.replace("\r\n", "\n").replace("\r", "\n");
String actualStr = actual.toString().replace("\r\n", "\n").replace("\r", "\n");

// Compare.
assertEquals(golden, actual.toString());
assertEquals(golden, actualStr);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,4 @@ public void run() {
}
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,158 @@ public static final void testStylingCornerCase() {
assertEquals(want, sanitize(input));
}

/**
* These 5 tests cover regression scenarios for CVE-2025-66021, which relates to
* improper sanitization of HTML content involving <style> and <noscript> tags.
* The tests ensure that HTMLSanitizer:
* - properly closes any opened elements,
* - only allows allowed elements inside <style> blocks,
* - prevents injection of forbidden HTML or scripts within style or noscript,
* - does not allow unexpected element escape or context breaking.
*/

/**
* Test #1:
* Verify that unallowed elements (<div>) injected inside <style> are removed,
* and only allowed content (CSS and allowed elements) remain.
*/
@Test
public static final void testCVE202566021_1() {
// Arrange: Attempt to inject a <div> inside <style>. Only 'style' and 'noscript' are allowed.
String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"evil\">XSS?</div></style></noscript>";
String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";

HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
PolicyFactory policy = htmlPolicyBuilder
.allowElements("style", "noscript")
.allowTextIn("style")
.toFactory();

// Act
String sanitized = policy.sanitize(actualPayload);

// Assert
assertEquals(expectedPayload, sanitized);
}

/**
* Test #2:
* Ensure that <script> tags (attempting script injection) are stripped out
* even when they appear inside allowed <style> tags.
*/
@Test
public static final void testCVE202566021_2() {
// Arrange: Attempt to inject a <script> inside <style>. Only 'style' and 'noscript' are allowed.
String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<script>alert('XSS Attack!')</script></style></noscript>";
String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";

HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
PolicyFactory policy = htmlPolicyBuilder
.allowElements("style", "noscript")
.allowTextIn("style")
.toFactory();

// Act
String sanitized = policy.sanitize(actualPayload);

// Assert
assertEquals(expectedPayload, sanitized);
}

/**
* Test #3:
* Ensure that, if <div> is allowed, then <div> injected inside <style>
* is retained by the sanitizer (since it is now in the policy).
*/
@Test
public static final void testCVE202566021_3() {
// Arrange: <div> is now allowed, so it should survive sanitization inside <style>.
String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"good\">ALLOWED?</div></style></noscript>";
String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"good\">ALLOWED?</div></style></noscript>";

HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
PolicyFactory policy = htmlPolicyBuilder
.allowElements("style", "noscript", "div")
.allowTextIn("style")
.toFactory();

// Act
String sanitized = policy.sanitize(actualPayload);

// Assert
assertEquals(expectedPayload, sanitized);
}

/**
* Test #4:
* Confirm that an attempt to prematurely close <style> with </noscript>, then inject a script,
* does not allow the injected script. Sanitizer closes elements properly and only emits allowed tags.
*/
@Test
public static final void testCVE202566021_4() {
// Arrange: Try to break out of <style> and <noscript>, then add a script. Only style/noscript/p allowed.
String actualPayload = "<noscript><style></noscript><script>alert(1)</script>";
String expectedPayload = "<noscript><style></noscript></style></noscript>";

HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
PolicyFactory policy = htmlPolicyBuilder
.allowElements("style", "noscript", "p")
.allowTextIn("style")
.toFactory();

// Act
String sanitized = policy.sanitize(actualPayload);

// Assert
assertEquals(expectedPayload, sanitized);
}

/**
* Test #5:
* Like Test #4, but with <p> instead of <noscript>. Ensures sanitizer emits correctly closed tags
* and strips the injected script tag completely.
*/
@Test
public static final void testCVE202566021_5() {
// Arrange: Try to break out of <style> through <p>, then add a script. Only style/noscript/p allowed.
String actualPayload = "<p><style></p><script>alert(1)</script>";
String expectedPayload = "<p><style></p></style></p>";

HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
PolicyFactory policy = htmlPolicyBuilder
.allowElements("style", "noscript", "p")
.allowTextIn("style")
.toFactory();

// Act
String sanitized = policy.sanitize(actualPayload);

// Assert
assertEquals(expectedPayload, sanitized);
}

/**
* Test that <script> tags with space < script> are sanitized correctly.
*/
@Test
public static final void testCVE202566021_6() {
// Arrange: Attempt to inject a <script> inside <style>. Only 'style' and 'noscript' elements are allowed.
String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }< script>alert('XSS Attack!')</script></style></noscript>";
String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";

HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
PolicyFactory policy = htmlPolicyBuilder
.allowElements("style", "noscript")
.allowTextIn("style")
.toFactory();

// Act
String sanitized = policy.sanitize(actualPayload);

// Assert
assertEquals(expectedPayload, sanitized);
}

private static String sanitize(@Nullable String html) {
StringBuilder sb = new StringBuilder();
HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
Expand Down
Loading