.
+ *
+ * @link https://www.w3.org/TR/css-syntax-3/#ident-start-code-point
+ *
+ * @param string $input The input string.
+ * @param int $offset The byte offset in the string.
+ * @return bool True if the next codepoint is an ident start code point, otherwise false.
+ */
+ final protected static function is_ident_start_codepoint( string $input, int $offset ): bool {
+ return (
+ '_' === $input[ $offset ] ||
+ ( 'a' <= $input[ $offset ] && $input[ $offset ] <= 'z' ) ||
+ ( 'A' <= $input[ $offset ] && $input[ $offset ] <= 'Z' ) ||
+ ord( $input[ $offset ] ) > 0x7F
+ );
+ }
+
+ /**
+ * Checks if the next code point is an "ident code point."
+ *
+ * Caution! This method does not do any bounds checking, it should not be passed
+ * a string with an offset that is out of bounds.
+ *
+ * > ident code point
+ * > An ident-start code point, a digit, or U+002D HYPHEN-MINUS (-).
+ * > digit
+ * > A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) inclusive.
+ *
+ * @link https://www.w3.org/TR/css-syntax-3/#ident-code-point
+ *
+ * @param string $input The input string.
+ * @param int $offset The byte offset in the string.
+ * @return bool True if the next codepoint is an ident code point, otherwise false.
+ */
+ final protected static function is_ident_codepoint( string $input, int $offset ): bool {
+ return '-' === $input[ $offset ] ||
+ ( '0' <= $input[ $offset ] && $input[ $offset ] <= '9' ) ||
+ self::is_ident_start_codepoint( $input, $offset );
+ }
+
+ /**
+ * Checks if three code points would start an ident sequence.
+ *
+ * > 4.3.9. Check if three code points would start an ident sequence
+ * > This section describes how to check if three code points would start an ident sequence. The algorithm described here can be called explicitly with three code points, or can be called with the input stream itself. In the latter case, the three code points in question are the current input code point and the next two input code points, in that order.
+ * >
+ * > Note: This algorithm will not consume any additional code points.
+ * >
+ * > Look at the first code point:
+ * >
+ * > U+002D HYPHEN-MINUS
+ * > If the second code point is an ident-start code point or a U+002D HYPHEN-MINUS, or the second and third code points are a valid escape, return true. Otherwise, return false.
+ * > ident-start code point
+ * > Return true.
+ * > U+005C REVERSE SOLIDUS (\)
+ * > If the first and second code points are a valid escape, return true. Otherwise, return false.
+ * > anything else
+ * > Return false.
+ *
+ * @link https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier
+ *
+ * @param string $input The input string.
+ * @param int $offset The byte offset in the string.
+ * @return bool True if the next three codepoints would start an ident sequence, otherwise false.
+ */
+ final protected static function check_if_three_code_points_would_start_an_ident_sequence( string $input, int $offset ): bool {
+ if ( $offset >= strlen( $input ) ) {
+ return false;
+ }
+
+ // > U+005C REVERSE SOLIDUS (\)
+ if ( '\\' === $input[ $offset ] ) {
+ return self::next_two_are_valid_escape( $input, $offset );
+ }
+
+ // > U+002D HYPHEN-MINUS
+ if ( '-' === $input[ $offset ] ) {
+ $after_initial_hyphen_minus_offset = $offset + 1;
+ if ( $after_initial_hyphen_minus_offset >= strlen( $input ) ) {
+ return false;
+ }
+
+ // > If the second code point is… U+002D HYPHEN-MINUS… return true
+ if ( '-' === $input[ $after_initial_hyphen_minus_offset ] ) {
+ return true;
+ }
+
+ // > If the second and third code points are a valid escape… return true.
+ if ( self::next_two_are_valid_escape( $input, $after_initial_hyphen_minus_offset ) ) {
+ return true;
+ }
+
+ // > If the second code point is an ident-start code point… return true.
+ if ( self::is_ident_start_codepoint( $input, $after_initial_hyphen_minus_offset ) ) {
+ return true;
+ }
+
+ // > Otherwise, return false.
+ return false;
+ }
+
+ // > ident-start code point
+ // > Return true.
+ // > anything else
+ // > Return false.
+ return self::is_ident_start_codepoint( $input, $offset );
+ }
+
+ /**
+ * Normalizes selector input for processing.
+ *
+ * @see https://www.w3.org/TR/css-syntax-3/#input-preprocessing
+ *
+ * @param string $input The selector string.
+ * @return string The normalized selector string.
+ */
+ final protected static function normalize_selector_input( string $input ): string {
+ /*
+ * > A selector string is a list of one or more complex selectors ([SELECTORS4], section 3.1) that may be surrounded by whitespace…
+ *
+ * This list includes \f.
+ * A later step would normalize it to a known whitespace character, but it can be trimmed here as well.
+ */
+ $input = trim( $input, " \t\r\n\f" );
+
+ /*
+ * > The input stream consists of the filtered code points pushed into it as the input byte stream is decoded.
+ * >
+ * > To filter code points from a stream of (unfiltered) code points input:
+ * > Replace any U+000D CARRIAGE RETURN (CR) code points, U+000C FORM FEED (FF) code points, or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) in input by a single U+000A LINE FEED (LF) code point.
+ * > Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�).
+ *
+ * https://www.w3.org/TR/css-syntax-3/#input-preprocessing
+ */
+ $input = str_replace( array( "\r\n" ), "\n", $input );
+ $input = str_replace( array( "\r", "\f" ), "\n", $input );
+ $input = str_replace( "\0", "\u{FFFD}", $input );
+
+ return $input;
+ }
+}
diff --git a/src/wp-includes/html-api/css/class-wp-css-type-selector.php b/src/wp-includes/html-api/css/class-wp-css-type-selector.php
new file mode 100644
index 0000000000000..c7c7baa2d5508
--- /dev/null
+++ b/src/wp-includes/html-api/css/class-wp-css-type-selector.php
@@ -0,0 +1,90 @@
+type = $type;
+ }
+
+ /**
+ * Determines if the processor's current position matches the selector.
+ *
+ * @param WP_HTML_Tag_Processor $processor The processor.
+ * @return bool True if the processor's current position matches the selector.
+ */
+ public function matches( WP_HTML_Tag_Processor $processor ): bool {
+ $tag_name = $processor->get_tag();
+ if ( null === $tag_name ) {
+ return false;
+ }
+ return $this->matches_tag( $tag_name );
+ }
+
+ /**
+ * Checks whether the selector matches the provided tag name.
+ *
+ * @param string $tag_name
+ * @return bool
+ */
+ public function matches_tag( string $tag_name ): bool {
+ if ( '*' === $this->type ) {
+ return true;
+ }
+ return 0 === strcasecmp( $tag_name, $this->type );
+ }
+
+ /**
+ * Parses a selector string to create a selector instance.
+ *
+ * To create an instance of this class, use the {@see WP_CSS_Compound_Selector_List::from_selectors()} method.
+ *
+ * @param string $input The selector string.
+ * @param int $offset The offset into the string. The offset is passed by reference and
+ * will be updated if the parse is successful.
+ * @return static|null The selector instance, or null if the parse was unsuccessful.
+ */
+ public static function parse( string $input, int &$offset ) {
+ if ( $offset >= strlen( $input ) ) {
+ return null;
+ }
+
+ if ( '*' === $input[ $offset ] ) {
+ ++$offset;
+ return new WP_CSS_Type_Selector( '*' );
+ }
+
+ $result = self::parse_ident( $input, $offset );
+ if ( null === $result ) {
+ return null;
+ }
+
+ return new self( $result );
+ }
+}
diff --git a/src/wp-settings.php b/src/wp-settings.php
index 60ffc307c5f6e..9337af05da3ae 100644
--- a/src/wp-settings.php
+++ b/src/wp-settings.php
@@ -266,6 +266,15 @@
require ABSPATH . WPINC . '/html-api/class-wp-html-stack-event.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php';
+require ABSPATH . WPINC . '/html-api/css/class-wp-css-selector-parser-matcher.php';
+require ABSPATH . WPINC . '/html-api/css/class-wp-css-attribute-selector.php';
+require ABSPATH . WPINC . '/html-api/css/class-wp-css-class-selector.php';
+require ABSPATH . WPINC . '/html-api/css/class-wp-css-id-selector.php';
+require ABSPATH . WPINC . '/html-api/css/class-wp-css-type-selector.php';
+require ABSPATH . WPINC . '/html-api/css/class-wp-css-compound-selector.php';
+require ABSPATH . WPINC . '/html-api/css/class-wp-css-complex-selector.php';
+require ABSPATH . WPINC . '/html-api/css/class-wp-css-compound-selector-list.php';
+require ABSPATH . WPINC . '/html-api/css/class-wp-css-complex-selector-list.php';
require ABSPATH . WPINC . '/class-wp-http.php';
require ABSPATH . WPINC . '/class-wp-http-streams.php';
require ABSPATH . WPINC . '/class-wp-http-curl.php';
diff --git a/tests/phpunit/tests/html-api/wpCssAttributeSelector.php b/tests/phpunit/tests/html-api/wpCssAttributeSelector.php
new file mode 100644
index 0000000000000..e574cedd1876b
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpCssAttributeSelector.php
@@ -0,0 +1,90 @@
+assertNull( $result );
+ } else {
+ $this->assertSame( $expected_name, $result->name );
+ $this->assertSame( $expected_matcher, $result->matcher );
+ $this->assertSame( $expected_value, $result->value );
+ $this->assertSame( $expected_modifier, $result->modifier );
+ $this->assertSame( $rest, substr( $input, $offset ) );
+ }
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array
+ */
+ public static function data_attribute_selectors(): array {
+ return array(
+ '[href]' => array( '[href]', 'href', null, null, null, '' ),
+ '[href] type' => array( '[href] type', 'href', null, null, null, ' type' ),
+ '[href]#id' => array( '[href]#id', 'href', null, null, null, '#id' ),
+ '[href].class' => array( '[href].class', 'href', null, null, null, '.class' ),
+ '[href][href2]' => array( '[href][href2]', 'href', null, null, null, '[href2]' ),
+ '[\n href\t\r]' => array( "[\n href\t\r]", 'href', null, null, null, '' ),
+ '[href=foo]' => array( '[href=foo]', 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foo', null, '' ),
+ '[href \n = bar ]' => array( "[href \n = bar ]", 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ),
+ '[href \n ^= baz ]' => array( "[href \n ^= baz ]", 'href', WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'baz', null, '' ),
+
+ '[match $= insensitive i]' => array( '[match $= insensitive i]', 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ),
+ '[match|=sensitive s]' => array( '[match|=sensitive s]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_HYPHEN_SUFFIXED, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ),
+ '[att=val I]' => array( '[att=val I]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ),
+ '[att=val S]' => array( '[att=val S]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ),
+
+ '[match~="quoted[][]"]' => array( '[match~="quoted[][]"]', 'match', WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT, 'quoted[][]', null, '' ),
+ "[match$='quoted!{}']" => array( "[match$='quoted!{}']", 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'quoted!{}', null, '' ),
+ "[match*='quoted's]" => array( "[match*='quoted's]", 'match', WP_CSS_Attribute_Selector::MATCH_CONTAINS, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ),
+
+ '[escape-nl="foo\\nbar"]' => array( "[escape-nl='foo\\\nbar']", 'escape-nl', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foobar', null, '' ),
+ '[escape-seq="\\31 23"]' => array( "[escape-seq='\\31 23']", 'escape-seq', WP_CSS_Attribute_Selector::MATCH_EXACT, '123', null, '' ),
+
+ // Invalid
+ 'Invalid: (empty string)' => array( '' ),
+ 'Invalid: foo' => array( 'foo' ),
+ 'Invalid: [foo' => array( '[foo' ),
+ 'Invalid: [#foo]' => array( '[#foo]' ),
+ 'Invalid: [*|*]' => array( '[*|*]' ),
+ 'Invalid: [ns|*]' => array( '[ns|*]' ),
+ 'Invalid: [* |att]' => array( '[* |att]' ),
+ 'Invalid: [*| att]' => array( '[*| att]' ),
+ 'Invalid: [att * =]' => array( '[att * =]' ),
+ 'Invalid: [att+=val]' => array( '[att+=val]' ),
+ 'Invalid: [att=val ' => array( '[att=val ' ),
+ 'Invalid: [att i]' => array( '[att i]' ),
+ 'Invalid: [att s]' => array( '[att s]' ),
+ "Invalid: [att='val\\n']" => array( "[att='val\n']" ),
+ 'Invalid: [att=val i ' => array( '[att=val i ' ),
+ 'Invalid: [att="val"ix' => array( '[att="val"ix' ),
+ );
+ }
+}
diff --git a/tests/phpunit/tests/html-api/wpCssClassSelector.php b/tests/phpunit/tests/html-api/wpCssClassSelector.php
new file mode 100644
index 0000000000000..9646d05da23d5
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpCssClassSelector.php
@@ -0,0 +1,49 @@
+assertNull( $result );
+ } else {
+ $this->assertSame( $expected, $result->class_name );
+ $this->assertSame( $rest, substr( $input, $offset ) );
+ }
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array
+ */
+ public static function data_class_selectors(): array {
+ return array(
+ 'valid ._-foo123' => array( '._-foo123', '_-foo123', '' ),
+ 'valid .foo.bar' => array( '.foo.bar', 'foo', '.bar' ),
+ 'escaped .\31 23' => array( '.\\31 23', '123', '' ),
+ 'with descendant .\31 23 div' => array( '.\\31 23 div', '123', ' div' ),
+
+ 'not class foo' => array( 'foo' ),
+ 'not class #bar' => array( '#bar' ),
+ 'not valid .1foo' => array( '.1foo' ),
+ );
+ }
+}
diff --git a/tests/phpunit/tests/html-api/wpCssComplexSelector.php b/tests/phpunit/tests/html-api/wpCssComplexSelector.php
new file mode 100644
index 0000000000000..8738bb6fc32d2
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpCssComplexSelector.php
@@ -0,0 +1,71 @@
+ .child#bar[baz=quux] , rest';
+ $offset = 0;
+
+ /** @var WP_CSS_Complex_Selector|null */
+ $sel = WP_CSS_Complex_Selector::parse( $input, $offset );
+
+ $this->assertSame( 2, count( $sel->context_selectors ) );
+
+ // Relative selectors should be reverse ordered.
+ $this->assertSame( 'el2', $sel->context_selectors[0][0]->type );
+ $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_CHILD, $sel->context_selectors[0][1] );
+
+ $this->assertSame( 'el1', $sel->context_selectors[1][0]->type );
+ $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT, $sel->context_selectors[1][1] );
+
+ $this->assertSame( 3, count( $sel->self_selector->subclass_selectors ) );
+ $this->assertNull( $sel->self_selector->type_selector );
+ $this->assertSame( 'child', $sel->self_selector->subclass_selectors[0]->class_name );
+
+ $this->assertSame( ', rest', substr( $input, $offset ) );
+ }
+
+ /**
+ * @ticket 62653
+ */
+ public function test_parse_invalid_complex_selector() {
+ $input = 'el.foo#bar[baz=quux] > , rest';
+ $offset = 0;
+ $result = WP_CSS_Complex_Selector::parse( $input, $offset );
+ $this->assertNull( $result );
+ }
+
+ /**
+ * @ticket 62653
+ */
+ public function test_parse_invalid_complex_selector_nonfinal_subclass() {
+ $input = 'el.foo#bar[baz=quux] > final, rest';
+ $offset = 0;
+ $result = WP_CSS_Complex_Selector::parse( $input, $offset );
+ $this->assertNull( $result );
+ }
+
+ /**
+ * @ticket 62653
+ */
+ public function test_parse_empty_complex_selector() {
+ $input = '';
+ $offset = 0;
+ $result = WP_CSS_Complex_Selector::parse( $input, $offset );
+ $this->assertNull( $result );
+ }
+}
diff --git a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php
new file mode 100644
index 0000000000000..edf912e97f490
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php
@@ -0,0 +1,51 @@
+ selector';
+ $result = WP_CSS_Complex_Selector_List::from_selectors( $input );
+ $this->assertNotNull( $result );
+ }
+
+ /**
+ * @ticket 62653
+ */
+ public function test_parse_invalid_selector_list() {
+ $input = 'el,,';
+ $result = WP_CSS_Complex_Selector_List::from_selectors( $input );
+ $this->assertNull( $result );
+ }
+
+ /**
+ * @ticket 62653
+ */
+ public function test_parse_invalid_selector_list2() {
+ $input = 'el!';
+ $result = WP_CSS_Complex_Selector_List::from_selectors( $input );
+ $this->assertNull( $result );
+ }
+
+ /**
+ * @ticket 62653
+ */
+ public function test_parse_empty_selector_list() {
+ $input = " \t \t\n\r\f";
+ $result = WP_CSS_Complex_Selector_List::from_selectors( $input );
+ $this->assertNull( $result );
+ }
+}
diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelector.php b/tests/phpunit/tests/html-api/wpCssCompoundSelector.php
new file mode 100644
index 0000000000000..8092ee049b6e1
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpCssCompoundSelector.php
@@ -0,0 +1,44 @@
+ .child';
+ $offset = 0;
+ $sel = WP_CSS_Compound_Selector::parse( $input, $offset );
+
+ $this->assertSame( 'el', $sel->type_selector->type );
+ $this->assertSame( 3, count( $sel->subclass_selectors ) );
+ $this->assertSame( 'foo', $sel->subclass_selectors[0]->class_name, 'foo' );
+ $this->assertSame( 'bar', $sel->subclass_selectors[1]->id, 'bar' );
+ $this->assertSame( 'baz', $sel->subclass_selectors[2]->name, 'baz' );
+ $this->assertSame( WP_CSS_Attribute_Selector::MATCH_EXACT, $sel->subclass_selectors[2]->matcher );
+ $this->assertSame( 'quux', $sel->subclass_selectors[2]->value );
+ $this->assertSame( ' > .child', substr( $input, $offset ) );
+ }
+
+ /**
+ * @ticket 62653
+ */
+ public function test_parse_empty_selector() {
+ $input = '';
+ $offset = 0;
+ $result = WP_CSS_Compound_Selector::parse( $input, $offset );
+ $this->assertNull( $result );
+ $this->assertSame( 0, $offset );
+ }
+}
diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php
new file mode 100644
index 0000000000000..8f1d3dfb88a45
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php
@@ -0,0 +1,60 @@
+assertNotNull( $result );
+ }
+
+ /**
+ * @ticket 62653
+ */
+ public function test_parse_invalid_selector_list() {
+ $input = 'el,,';
+ $result = WP_CSS_Compound_Selector_List::from_selectors( $input );
+ $this->assertNull( $result );
+ }
+
+ /**
+ * @ticket 62653
+ */
+ public function test_parse_invalid_selector_list2() {
+ $input = 'el!';
+ $result = WP_CSS_Compound_Selector_List::from_selectors( $input );
+ $this->assertNull( $result );
+ }
+
+ /**
+ * @ticket 62653
+ */
+ public function test_parse_empty_selector_list() {
+ $input = " \t \t\n\r\f";
+ $result = WP_CSS_Compound_Selector_List::from_selectors( $input );
+ $this->assertNull( $result );
+ }
+
+ /**
+ * @ticket 62653
+ */
+ public function test_unsupported_complex_selector() {
+ $input = 'ancestor descendant';
+ $result = WP_CSS_Compound_Selector_List::from_selectors( $input );
+ $this->assertNull( $result );
+ }
+}
diff --git a/tests/phpunit/tests/html-api/wpCssIdSelector.php b/tests/phpunit/tests/html-api/wpCssIdSelector.php
new file mode 100644
index 0000000000000..6dc2e5461ea03
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpCssIdSelector.php
@@ -0,0 +1,50 @@
+assertNull( $result );
+ } else {
+ $this->assertSame( $expected, $result->id );
+ $this->assertSame( $rest, substr( $input, $offset ) );
+ }
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array
+ */
+ public static function data_id_selectors(): array {
+ return array(
+ 'valid #_-foo123' => array( '#_-foo123', '_-foo123', '' ),
+ 'valid #foo#bar' => array( '#foo#bar', 'foo', '#bar' ),
+ 'escaped #\31 23' => array( '#\\31 23', '123', '' ),
+ 'with descendant #\31 23 div' => array( '#\\31 23 div', '123', ' div' ),
+
+ // Invalid
+ 'not ID foo' => array( 'foo' ),
+ 'not ID .bar' => array( '.bar' ),
+ 'not valid #1foo' => array( '#1foo' ),
+ );
+ }
+}
diff --git a/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php b/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php
new file mode 100644
index 0000000000000..29372172da2b1
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php
@@ -0,0 +1,179 @@
+test_class = new class() extends WP_CSS_Selector_Parser_Matcher {
+ public function matches( $processor ): bool {
+ throw new Error( 'Matches called on test class.' );
+ }
+ public static function parse( string $input, int &$offset ) {
+ throw new Error( 'Parse called on test class.' );
+ }
+
+ /*
+ * Parsing
+ */
+ public static function test_parse_ident( string $input, int &$offset ) {
+ return self::parse_ident( $input, $offset );
+ }
+
+ public static function test_parse_string( string $input, int &$offset ) {
+ return self::parse_string( $input, $offset );
+ }
+
+ /*
+ * Utilities
+ */
+ public static function test_is_ident_codepoint( string $input, int $offset ) {
+ return self::is_ident_codepoint( $input, $offset );
+ }
+
+ public static function test_is_ident_start_codepoint( string $input, int $offset ) {
+ return self::is_ident_start_codepoint( $input, $offset );
+ }
+ };
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array
+ */
+ public static function data_idents(): array {
+ return array(
+ 'trailing #' => array( '_-foo123#xyz', '_-foo123', '#xyz' ),
+ 'trailing .' => array( '😍foo123.xyz', '😍foo123', '.xyz' ),
+ 'trailing " "' => array( '😍foo123 more', '😍foo123', ' more' ),
+ 'escaped ASCII character' => array( '\\xyz', 'xyz', '' ),
+ 'escaped space' => array( '\\ x', ' x', '' ),
+ 'escaped emoji' => array( '\\😍', '😍', '' ),
+ 'hex unicode codepoint' => array( '\\1f0a1', '🂡', '' ),
+ 'HEX UNICODE CODEPOINT' => array( '\\1D4B2', '𝒲', '' ),
+
+ 'hex tab-suffixed 1' => array( "\\31\t23", '123', '' ),
+ 'hex newline-suffixed 1' => array( "\\31\n23", '123', '' ),
+ 'hex space-suffixed 1' => array( "\\31 23", '123', '' ),
+ 'hex tab' => array( '\\9', "\t", '' ),
+ 'hex a' => array( '\\61 bc', 'abc', '' ),
+ 'hex a max escape length' => array( '\\000061bc', 'abc', '' ),
+
+ 'out of range replacement min' => array( '\\110000 ', "\u{fffd}", '' ),
+ 'out of range replacement max' => array( '\\ffffff ', "\u{fffd}", '' ),
+ 'leading surrogate min replacement' => array( '\\d800 ', "\u{fffd}", '' ),
+ 'leading surrogate max replacement' => array( '\\dbff ', "\u{fffd}", '' ),
+ 'trailing surrogate min replacement' => array( '\\dc00 ', "\u{fffd}", '' ),
+ 'trailing surrogate max replacement' => array( '\\dfff ', "\u{fffd}", '' ),
+ 'can start with -ident' => array( '-ident', '-ident', '' ),
+ 'can start with --anything' => array( '--anything', '--anything', '' ),
+ 'can start with ---anything' => array( '--_anything', '--_anything', '' ),
+ 'can start with --1anything' => array( '--1anything', '--1anything', '' ),
+ 'can start with -\31 23' => array( '-\31 23', '-123', '' ),
+ 'can start with --\31 23' => array( '--\31 23', '--123', '' ),
+ 'ident ends before ]' => array( 'ident]', 'ident', ']' ),
+
+ // Invalid
+ 'Invalid: (empty string)' => array( '' ),
+ 'Invalid: bad start >' => array( '>ident' ),
+ 'Invalid: bad start [' => array( '[ident' ),
+ 'Invalid: bad start #' => array( '#ident' ),
+ 'Invalid: bad start " "' => array( ' ident' ),
+ 'Invalid: bad start 1' => array( '1ident' ),
+ 'Invalid: bad start -1' => array( '-1ident' ),
+ 'Invalid: bad start -' => array( '-' ),
+ );
+ }
+
+ /**
+ * @ticket 62653
+ */
+ public function test_is_ident_and_is_ident_start() {
+ $this->assertFalse( $this->test_class::test_is_ident_codepoint( '[', 0 ) );
+ $this->assertFalse( $this->test_class::test_is_ident_codepoint( ']', 0 ) );
+ $this->assertFalse( $this->test_class::test_is_ident_start_codepoint( '[', 0 ) );
+ $this->assertFalse( $this->test_class::test_is_ident_start_codepoint( ']', 0 ) );
+ }
+
+ /**
+ * @ticket 62653
+ *
+ * @dataProvider data_idents
+ */
+ public function test_parse_ident( string $input, ?string $expected = null, ?string $rest = null ) {
+
+ $offset = 0;
+ $result = $this->test_class::test_parse_ident( $input, $offset );
+ if ( null === $expected ) {
+ $this->assertNull( $result );
+ } else {
+ $this->assertSame( $expected, $result, 'Ident did not match.' );
+ $this->assertSame( $rest, substr( $input, $offset ), 'Offset was not updated correctly.' );
+ }
+ }
+
+ /**
+ * @ticket 62653
+ *
+ * @dataProvider data_strings
+ */
+ public function test_parse_string( string $input, ?string $expected = null, ?string $rest = null ) {
+ $offset = 0;
+ $result = $this->test_class::test_parse_string( $input, $offset );
+ if ( null === $expected ) {
+ $this->assertNull( $result );
+ } else {
+ $this->assertSame( $expected, $result, 'String did not match.' );
+ $this->assertSame( $rest, substr( $input, $offset ), 'Offset was not updated correctly.' );
+ }
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array
+ */
+ public static function data_strings(): array {
+ return array(
+ '"foo"' => array( '"foo"', 'foo', '' ),
+ '"foo"after' => array( '"foo"after', 'foo', 'after' ),
+ '"foo""two"' => array( '"foo""two"', 'foo', '"two"' ),
+ '"foo"\'two\'' => array( '"foo"\'two\'', 'foo', "'two'" ),
+
+ "'foo'" => array( "'foo'", 'foo', '' ),
+ "'foo'after" => array( "'foo'after", 'foo', 'after' ),
+ "'foo'\"two\"" => array( "'foo'\"two\"", 'foo', '"two"' ),
+ "'foo''two'" => array( "'foo''two'", 'foo', "'two'" ),
+
+ "'foo\\nbar'" => array( "'foo\\\nbar'", 'foobar', '' ),
+ "'foo\\31 23'" => array( "'foo\\31 23'", 'foo123', '' ),
+ "'foo\\31\\n23'" => array( "'foo\\31\n23'", 'foo123', '' ),
+ "'foo\\31\\t23'" => array( "'foo\\31\t23'", 'foo123', '' ),
+ "'foo\\00003123'" => array( "'foo\\00003123'", 'foo123', '' ),
+
+ "'foo\\" => array( "'foo\\", 'foo', '' ),
+
+ '"' => array( '"', '', '' ),
+ '"\\"' => array( '"\\"', '"', '' ),
+ '"missing close' => array( '"missing close', 'missing close', '' ),
+
+ // Invalid
+ 'Invalid: (empty string)' => array( '' ),
+ 'Invalid: .foo' => array( '.foo' ),
+ 'Invalid: #foo' => array( '#foo' ),
+ "Invalid: 'newline\\n'" => array( "'newline\n'" ),
+ 'Invalid: foo' => array( 'foo' ),
+ );
+ }
+}
diff --git a/tests/phpunit/tests/html-api/wpCssTypeSelector.php b/tests/phpunit/tests/html-api/wpCssTypeSelector.php
new file mode 100644
index 0000000000000..23d5f5517453a
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpCssTypeSelector.php
@@ -0,0 +1,51 @@
+assertNull( $result );
+ } else {
+ $this->assertSame( $expected, $result->type );
+ $this->assertSame( $rest, substr( $input, $offset ) );
+ }
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array
+ */
+ public static function data_type_selectors(): array {
+ return array(
+ 'any *' => array( '* .class', '*', ' .class' ),
+ 'a' => array( 'a', 'a', '' ),
+ 'div.class' => array( 'div.class', 'div', '.class' ),
+ 'custom-type#id' => array( 'custom-type#id', 'custom-type', '#id' ),
+
+ // Invalid
+ 'Invalid: (empty string)' => array( '' ),
+ 'Invalid: #id' => array( '#id' ),
+ 'Invalid: .class' => array( '.class' ),
+ 'Invalid: [attr]' => array( '[attr]' ),
+ );
+ }
+}
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php
new file mode 100644
index 0000000000000..003e65e69ebce
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php
@@ -0,0 +1,89 @@
+' );
+ $this->assertFalse( $processor->select( 'div' ) );
+ }
+
+ /**
+ * @ticket 62653
+ *
+ * @dataProvider data_selectors
+ */
+ public function test_selects_all_matches( string $html, string $selector, int $match_count ) {
+ $processor = WP_HTML_Processor::create_full_parser( $html );
+ $count = 0;
+ while ( $processor->select( $selector ) ) {
+ $breadcrumb_string = implode( ', ', $processor->get_breadcrumbs() );
+ $this->assertTrue(
+ $processor->get_attribute( 'match' ),
+ "Matched unexpected tag {$processor->get_tag()} @ {$breadcrumb_string}"
+ );
+ ++$count;
+ }
+ $this->assertSame( $match_count, $count, 'Did not match expected number of tags.' );
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array
+ */
+ public static function data_selectors(): array {
+ return array(
+ 'any' => array( '', '*', 5 ),
+ 'quirks mode ID' => array( '
In quirks mode, ID matching is case-insensitive.', '#id', 2 ),
+ 'quirks mode class' => array( '
In quirks mode, class matching is case-insensitive.', '.c', 2 ),
+ 'no-quirks mode ID' => array( '
In no-quirks mode, ID matching is case-sensitive.', '#id', 1 ),
+ 'no-quirks mode class' => array( '
In no-quirks mode, class matching is case-sensitive.', '.c', 1 ),
+ 'any descendant' => array( '', 'section *', 4 ),
+ 'any child matches all children' => array( '', 'section > *', 2 ),
+
+ 'multiple complex selectors' => array( '', 'section > div p > i', 1 ),
+ );
+ }
+
+ /**
+ * @ticket 62653
+ *
+ * @expectedIncorrectUsage WP_HTML_Processor::select
+ *
+ * @dataProvider data_invalid_selectors
+ */
+ public function test_invalid_selector( string $selector ) {
+ $processor = WP_HTML_Processor::create_fragment( 'irrelevant' );
+ $this->assertFalse( $processor->select( $selector ) );
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array
+ */
+ public static function data_invalid_selectors(): array {
+ return array(
+ 'invalid selector' => array( '[invalid!selector]' ),
+
+ // The class selectors below are not allowed in non-final position.
+ 'unsupported child selector' => array( '.parent > .child' ),
+ 'unsupported descendant selector' => array( '.ancestor .descendant' ),
+
+ // Unsupported combinators
+ 'unsupported next sibling selector' => array( 'p + p' ),
+ 'unsupported subsequent sibling selector' => array( 'p ~ p' ),
+ );
+ }
+}
diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php
new file mode 100644
index 0000000000000..1d09c61b4760d
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php
@@ -0,0 +1,107 @@
+' );
+ $this->assertFalse( $processor->select( 'div' ) );
+ }
+
+ /**
+ * @ticket 62653
+ *
+ * @dataProvider data_selectors
+ */
+ public function test_select( string $html, string $selector, int $match_count ) {
+ $processor = new WP_HTML_Tag_Processor( $html );
+ $count = 0;
+ while ( $processor->select( $selector ) ) {
+ $this->assertTrue(
+ $processor->get_attribute( 'match' ),
+ "Matched unexpected tag {$processor->get_tag()}"
+ );
+ ++$count;
+ }
+ $this->assertSame( $match_count, $count, 'Did not match expected number of tags.' );
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array
+ */
+ public static function data_selectors(): array {
+ return array(
+ 'simple type' => array( '', 'div', 2 ),
+ 'any type' => array( '', '*', 2 ),
+ 'simple class' => array( '', '.x', 2 ),
+ 'simple id' => array( '', '#x', 2 ),
+
+ 'attribute presence' => array( '', '[att]', 2 ),
+ 'attribute empty string match' => array( '', '[att=""]', 2 ),
+ 'attribute value' => array( '', '[att=val]', 2 ),
+ 'attribute quoted value' => array( '
', '[att="::"]', 2 ),
+ 'attribute case insensitive' => array( '
', '[att="VAL"i]', 2 ),
+ 'attribute case sensitive mod' => array( '
', '[att="val"s]', 2 ),
+
+ 'attribute one of' => array( '
', '[att~="b"]', 3 ),
+ 'attribute one of insensitive' => array( '
', '[att~="b"i]', 1 ),
+ 'attribute one of mod sensitive' => array( '
', '[att~="b"s]', 1 ),
+ 'attribute one of whitespace cases' => array( "
", '[att~="b"]', 1 ),
+
+ 'attribute with-hyphen' => array( '
', '[att|="special"]', 2 ),
+ 'attribute with-hyphen insensitive' => array( '
', '[att|="special" i]', 2 ),
+ 'attribute with-hyphen sensitive mod' => array( '
', '[att|="special"s]', 1 ),
+
+ 'attribute prefixed' => array( '
', '[att^="p"]', 2 ),
+ 'attribute prefixed insensitive' => array( '
', '[att^="p"i]', 1 ),
+ 'attribute prefixed sensitive mod' => array( '
', '[att^="p"s]', 1 ),
+
+ 'attribute suffixed' => array( '
', '[att$="x"]', 2 ),
+ 'attribute suffixed insensitive' => array( '
', '[att$="x"i]', 1 ),
+ 'attribute suffixed sensitive mod' => array( '
', '[att$="x"s]', 1 ),
+
+ 'attribute contains' => array( '
', '[att*="x"]', 2 ),
+ 'attribute contains insensitive' => array( '
', '[att*="x"i]', 1 ),
+ 'attribute contains sensitive mod' => array( '
', '[att*="x"s]', 1 ),
+
+ 'list' => array( '
', 'a, p, .class, #id, [att]', 2 ),
+ 'compound' => array( '', 'custom-el[att="bar"][ fruit ~= "banana" i]', 1 ),
+ );
+ }
+
+ /**
+ * @ticket 62653
+ *
+ * @expectedIncorrectUsage WP_HTML_Tag_Processor::select
+ *
+ * @dataProvider data_invalid_selectors
+ */
+ public function test_invalid_selector( string $selector ) {
+ $processor = new WP_HTML_Tag_Processor( 'irrelevant' );
+ $this->assertFalse( $processor->select( $selector ) );
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array
+ */
+ public static function data_invalid_selectors(): array {
+ return array(
+ 'complex descendant' => array( 'div *' ),
+ 'complex child' => array( 'div > *' ),
+ 'invalid selector' => array( '[invalid!selector]' ),
+ );
+ }
+}