diff --git a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java index 0af4f51f..5a3b5fe9 100644 --- a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java +++ b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java @@ -94,8 +94,170 @@ public void closeDocument() { public void text(String textChunk) { if (!skipText) { - out.text(textChunk); + // Check if we're inside a CDATA element (style/script) with allowTextIn + // where tags are reclassified as UNESCAPED text and need to be validated + // Note: Only style and script are CDATA elements; noscript/noembed/noframes are PCDATA + boolean insideCdataElement = false; + for (int i = openElementStack.size() - 1; i >= 0; i -= 2) { + String adjustedName = openElementStack.get(i); + if (adjustedName != null + && allowedTextContainers.contains(adjustedName) + && ("style".equals(adjustedName) || "script".equals(adjustedName))) { + insideCdataElement = true; + break; + } + } + + // If inside a CDATA element (style/script) with allowTextIn, we need to filter out + // HTML tags that aren't allowed because tags inside these blocks are reclassified + // as UNESCAPED text by the lexer + if (insideCdataElement && textChunk != null && textChunk.indexOf('<') >= 0) { + // Strip out HTML tags that aren't in the allowed elements list + String filtered = stripDisallowedTags(textChunk); + out.text(filtered); + } else { + out.text(textChunk); + } + } + } + + /** + * Strips out HTML tags that aren't in the allowed elements list from text content. + * This is used when tags appear inside text containers (like style blocks) where + * they're treated as text but should still be validated. + */ + private String stripDisallowedTags(String text) { + if (text == null) { + return text; + } + + StringBuilder result = new StringBuilder(); + int len = text.length(); + int i = 0; + + while (i < len) { + int tagStart = text.indexOf('<', i); + if (tagStart < 0) { + // No more tags, append the rest + result.append(text.substring(i)); + break; + } + + // Append text before the tag + if (tagStart > i) { + result.append(text.substring(i, tagStart)); + } + + // Find the end of the tag (either '>' or end of string) + int tagEnd = text.indexOf('>', tagStart + 1); + if (tagEnd < 0) { + // Unclosed tag, skip it + i = tagStart + 1; + continue; + } + + // Extract the tag content (between < and >) + String tagContent = text.substring(tagStart + 1, tagEnd); + + // Only process if this looks like a valid HTML element tag + // Valid tags start with a letter or / followed by a letter + // Skip things like <, , <3, etc. + // Also handle tags with leading whitespace like < script> + boolean isValidTag = false; + String tagName = null; + + // Trim leading whitespace for tag name detection + String trimmedTagContent = tagContent.trim(); + + if (trimmedTagContent.startsWith("/")) { + // Closing tag - must have / followed by a letter + if (trimmedTagContent.length() > 1) { + char firstChar = trimmedTagContent.charAt(1); + if (Character.isLetter(firstChar)) { + isValidTag = true; + tagName = trimmedTagContent.substring(1).trim().split("\\s")[0]; + tagName = HtmlLexer.canonicalElementName(tagName); + } + } + } else { + // Opening tag - must start with a letter (after trimming whitespace) + if (trimmedTagContent.length() > 0) { + char firstChar = trimmedTagContent.charAt(0); + if (Character.isLetter(firstChar)) { + isValidTag = true; + tagName = trimmedTagContent.split("\\s")[0]; + tagName = HtmlLexer.canonicalElementName(tagName); + } + } + } + + if (!isValidTag) { + // Not a valid HTML tag, just append it as-is + result.append('<').append(tagContent).append('>'); + i = tagEnd + 1; + continue; + } + + // Check if it's a closing tag + if (tagContent.startsWith("/")) { + // Only allow closing tags if the element is allowed + if (elAndAttrPolicies.containsKey(tagName)) { + result.append('<').append(tagContent).append('>'); + } + // Otherwise skip the closing tag + i = tagEnd + 1; + } else { + // Opening tag - only allow tags if the element is in the allowed list + if (elAndAttrPolicies.containsKey(tagName)) { + result.append('<').append(tagContent).append('>'); + i = tagEnd + 1; + } else { + // Skip disallowed tag and its content until matching closing tag + i = tagEnd + 1; + // Track nesting level to find the matching closing tag + int nestingLevel = 1; + while (i < len && nestingLevel > 0) { + int nextTagStart = text.indexOf('<', i); + if (nextTagStart < 0) { + // No more tags, skip to end + i = len; + break; + } + int nextTagEnd = text.indexOf('>', nextTagStart + 1); + if (nextTagEnd < 0) { + // Unclosed tag, skip to end + i = len; + break; + } + String nextTagContent = text.substring(nextTagStart + 1, nextTagEnd); + String trimmedNextTagContent = nextTagContent.trim(); + String nextTagName = trimmedNextTagContent.split("\\s")[0]; + if (trimmedNextTagContent.startsWith("/")) { + // Closing tag + nextTagName = nextTagName.substring(1); + nextTagName = HtmlLexer.canonicalElementName(nextTagName); + if (nextTagName.equals(tagName)) { + nestingLevel--; + if (nestingLevel == 0) { + // Found matching closing tag, skip it and continue + i = nextTagEnd + 1; + break; + } + } + } else { + // Opening tag + nextTagName = HtmlLexer.canonicalElementName(nextTagName); + if (nextTagName.equals(tagName)) { + nestingLevel++; + } + } + i = nextTagEnd + 1; + } + } + } } + + return result.toString(); } public void openTag(String elementName, List attrs) { diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java index 2ebf55ea..4065eb93 100644 --- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java +++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java @@ -46,14 +46,19 @@ public class HtmlLexerTest extends TestCase { public final void testHtmlLexer() throws Exception { // Do the lexing. String input = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexerinput1.html").toURI())), StandardCharsets.UTF_8); + // Normalize line endings in input to handle Windows/Unix differences + input = input.replace("\r\n", "\n").replace("\r", "\n"); StringBuilder actual = new StringBuilder(); lex(input, actual); // Get the golden. String golden = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI())), StandardCharsets.UTF_8); + // Normalize line endings to handle Windows/Unix differences + golden = golden.replace("\r\n", "\n").replace("\r", "\n"); + String actualStr = actual.toString().replace("\r\n", "\n").replace("\r", "\n"); // Compare. - assertEquals(golden, actual.toString()); + assertEquals(golden, actualStr); } @Test diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java index 633ed91d..17d4c15a 100644 --- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java +++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java @@ -162,4 +162,4 @@ public void run() { } } -} +} \ No newline at end of file diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java index 1ff169df..003e52c8 100644 --- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java +++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java @@ -454,6 +454,158 @@ public static final void testStylingCornerCase() { assertEquals(want, sanitize(input)); } + /** + * These 5 tests cover regression scenarios for CVE-2025-66021, which relates to + * improper sanitization of HTML content involving "; + String expectedPayload = ""; + + HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder(); + PolicyFactory policy = htmlPolicyBuilder + .allowElements("style", "noscript") + .allowTextIn("style") + .toFactory(); + + // Act + String sanitized = policy.sanitize(actualPayload); + + // Assert + assertEquals(expectedPayload, sanitized); + } + + /** + * Test #2: + * Ensure that "; + String expectedPayload = ""; + + HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder(); + PolicyFactory policy = htmlPolicyBuilder + .allowElements("style", "noscript") + .allowTextIn("style") + .toFactory(); + + // Act + String sanitized = policy.sanitize(actualPayload); + + // Assert + assertEquals(expectedPayload, sanitized); + } + + /** + * Test #3: + * Ensure that, if
is allowed, then
injected inside "; + String expectedPayload = ""; + + HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder(); + PolicyFactory policy = htmlPolicyBuilder + .allowElements("style", "noscript", "div") + .allowTextIn("style") + .toFactory(); + + // Act + String sanitized = policy.sanitize(actualPayload); + + // Assert + assertEquals(expectedPayload, sanitized); + } + + /** + * Test #4: + * Confirm that an attempt to prematurely close "; + + HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder(); + PolicyFactory policy = htmlPolicyBuilder + .allowElements("style", "noscript", "p") + .allowTextIn("style") + .toFactory(); + + // Act + String sanitized = policy.sanitize(actualPayload); + + // Assert + assertEquals(expectedPayload, sanitized); + } + + /** + * Test #5: + * Like Test #4, but with

instead of

"; + + HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder(); + PolicyFactory policy = htmlPolicyBuilder + .allowElements("style", "noscript", "p") + .allowTextIn("style") + .toFactory(); + + // Act + String sanitized = policy.sanitize(actualPayload); + + // Assert + assertEquals(expectedPayload, sanitized); + } + + /** + * Test that "; + String expectedPayload = ""; + + HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder(); + PolicyFactory policy = htmlPolicyBuilder + .allowElements("style", "noscript") + .allowTextIn("style") + .toFactory(); + + // Act + String sanitized = policy.sanitize(actualPayload); + + // Assert + assertEquals(expectedPayload, sanitized); + } + private static String sanitize(@Nullable String html) { StringBuilder sb = new StringBuilder(); HtmlStreamRenderer renderer = HtmlStreamRenderer.create(