Amet' );
+// $p->parse();
+/*
+Outputs:
+
+DOM after main loop:
+ HTML
+ ├─ UL
+ ├─ LI
+ └─ #text: 1
+ ├─ LI
+ └─ #text: 2
+ ├─ LI
+ └─ #text: 3
+ ├─ LI
+ ├─ #text: Lorem
+ └─ B
+ └─ #text: Ipsum
+ └─ LI
+ └─ B
+ └─ #text: Dolor
+ └─ B
+ └─ SPAN
+ ├─ #text: Sit
+ └─ SPAN
+ ├─ #text: Sit
+ └─ SPAN
+ └─ DIV
+ └─ #text: Amet
+*/
+
+$p = new WP_HTML_Processor( '
+
+' );
+$p->parse();
+// $p = new WP_HTML_Processor( '
123
' );
+// $p->parse();
+// /*
+// Outputs the correct result:
+// B
+// └─ #text: 1
+// P
+// ├─ B
+// └─ #text: 2
+// └─ #text: 3
+// */
+echo "\n\n";
+echo $p->reconstructed_html;
+die();
+
+$p = new WP_HTML_Processor( '
X
+
X
+
X
+
X' );
+$p->parse();
+/*
+DOM after main loop:
+ HTML
+ ├─ P
+ └─ B class="x"
+ └─ B class="x"
+ └─ B
+ └─ B class="x"
+ └─ B class="x"
+ └─ B
+ └─ #text: X
+ ├─ P
+ └─ B class="x"
+ └─ B
+ └─ B class="x"
+ └─ B class="x"
+ └─ B
+ └─ #text: X
+ ├─ P
+ └─ B class="x"
+ └─ B
+ └─ B class="x"
+ └─ B class="x"
+ └─ B
+ └─ B
+ └─ B class="x"
+ └─ B
+ └─ #text: X
+ └─ P
+ └─ #text: X
+*/
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 31db41a3c86ad..9aca0d6f28b85 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -274,7 +274,7 @@ class WP_HTML_Tag_Processor {
* @since 6.2.0
* @var string
*/
- private $html;
+ public $html;
/**
* The last query passed to next_tag().
@@ -343,7 +343,7 @@ class WP_HTML_Tag_Processor {
* @since 6.2.0
* @var int
*/
- private $bytes_already_parsed = 0;
+ protected $bytes_already_parsed = 0;
/**
* How many bytes from the input HTML document have already been
@@ -406,7 +406,7 @@ class WP_HTML_Tag_Processor {
* @since 6.2.0
* @var int|null
*/
- private $tag_ends_at;
+ protected $tag_ends_at;
/**
* Whether the current tag is an opening tag, e.g.
, or a closing tag, e.g.
.
@@ -528,6 +528,18 @@ class WP_HTML_Tag_Processor {
*/
protected $lexical_updates = array();
+ /**
+ * Attribute replacements to apply to input HTML document.
+ *
+ * Unlike more generic lexical updates, attribute updates are stored
+ * in an associative array, where the keys are (lowercase-normalized)
+ * attribute names, in order to avoid duplication.
+ *
+ * @since 6.3.0
+ * @var WP_HTML_Text_Replacement[]
+ */
+ private $attribute_updates = array();
+
/**
* Tracks and limits `seek()` calls to prevent accidental infinite loops.
*
@@ -712,7 +724,7 @@ public function set_bookmark( $name ) {
return false;
}
- if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= self::MAX_BOOKMARKS ) {
+ if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= static::MAX_BOOKMARKS ) {
_doing_it_wrong(
__METHOD__,
__( 'Too many bookmarks: cannot create any more.' ),
@@ -722,7 +734,7 @@ public function set_bookmark( $name ) {
}
$this->bookmarks[ $name ] = new WP_HTML_Span(
- $this->tag_name_starts_at - 1,
+ $this->tag_name_starts_at - ( $this->is_closing_tag ? 2 : 1 ),
$this->tag_ends_at
);
@@ -1237,15 +1249,16 @@ private function skip_whitespace() {
}
/**
- * Applies attribute updates and cleans up once a tag is fully parsed.
+ * Applies lexical updates and cleans up once a tag is fully parsed.
*
* @since 6.2.0
*
* @return void
*/
private function after_tag() {
- $this->class_name_updates_to_attributes_updates();
- $this->apply_attributes_updates();
+ $this->class_name_updates_to_attribute_updates();
+ $this->attribute_updates_to_lexical_updates();
+ $this->apply_lexical_updates();
$this->tag_name_starts_at = null;
$this->tag_name_length = null;
$this->tag_ends_at = null;
@@ -1254,17 +1267,17 @@ private function after_tag() {
}
/**
- * Converts class name updates into tag attributes updates
+ * Converts class name updates into tag attribute updates
* (they are accumulated in different data formats for performance).
*
- * @see $lexical_updates
+ * @see $attribute_updates
* @see $classname_updates
*
* @since 6.2.0
*
* @return void
*/
- private function class_name_updates_to_attributes_updates() {
+ private function class_name_updates_to_attribute_updates() {
if ( count( $this->classname_updates ) === 0 ) {
return;
}
@@ -1398,13 +1411,33 @@ private function class_name_updates_to_attributes_updates() {
}
/**
- * Applies attribute updates to HTML document.
+ * Converts attribute updates into lexical updates.
+ *
+ * This method is only meant to run right before the attribute updates are applied.
+ * The behavior in all other cases is undefined.
+ *
+ * @return void
+ * @since 6.3.0
+ *
+ * @see $attribute_updates
+ * @see $lexical_updates
+ */
+ private function attribute_updates_to_lexical_updates() {
+ foreach ( $this->attribute_updates as $update ) {
+ $this->lexical_updates[] = $update;
+ }
+ $this->attribute_updates = array();
+ }
+
+ /**
+ * Applies lexical updates to HTML document.
*
* @since 6.2.0
+ * @since 6.3.0 Invalidate any bookmarks whose targets are overwritten.
*
* @return void
*/
- private function apply_attributes_updates() {
+ private function apply_lexical_updates() {
if ( ! count( $this->lexical_updates ) ) {
return;
}
@@ -1431,7 +1464,7 @@ private function apply_attributes_updates() {
* Adjust bookmark locations to account for how the text
* replacements adjust offsets in the input document.
*/
- foreach ( $this->bookmarks as $bookmark ) {
+ foreach ( $this->bookmarks as $bookmark_name => $bookmark ) {
/*
* Each lexical update which appears before the bookmark's endpoints
* might shift the offsets for those endpoints. Loop through each change
@@ -1442,20 +1475,22 @@ private function apply_attributes_updates() {
$tail_delta = 0;
foreach ( $this->lexical_updates as $diff ) {
- $update_head = $bookmark->start >= $diff->start;
- $update_tail = $bookmark->end >= $diff->start;
-
- if ( ! $update_head && ! $update_tail ) {
+ if ( $bookmark->start < $diff->start && $bookmark->end < $diff->start ) {
break;
}
+ if ( $bookmark->start >= $diff->start && $bookmark->end < $diff->end ) {
+ $this->release_bookmark( $bookmark_name );
+ continue 2;
+ }
+
$delta = strlen( $diff->text ) - ( $diff->end - $diff->start );
- if ( $update_head ) {
+ if ( $bookmark->start >= $diff->start ) {
$head_delta += $delta;
}
- if ( $update_tail ) {
+ if ( $bookmark->end >= $diff->end ) {
$tail_delta += $delta;
}
}
@@ -1467,6 +1502,18 @@ private function apply_attributes_updates() {
$this->lexical_updates = array();
}
+ /**
+ * Checks whether a bookmark with the given name exists.
+ *
+ * @since 6.3.0
+ *
+ * @param string $bookmark_name Name to identify a bookmark that potentially exists.
+ * @return bool Whether that bookmark exists.
+ */
+ public function has_bookmark( $bookmark_name ) {
+ return array_key_exists( $bookmark_name, $this->bookmarks );
+ }
+
/**
* Move the internal cursor in the Tag Processor to a given bookmark's location.
*
@@ -1504,7 +1551,7 @@ public function seek( $bookmark_name ) {
$this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start;
$this->bytes_already_copied = $this->bytes_already_parsed;
$this->output_buffer = substr( $this->html, 0, $this->bytes_already_copied );
- return $this->next_tag();
+ return $this->next_tag( array( 'tag_closers' => 'visit' ) );
}
/**
@@ -1512,8 +1559,8 @@ public function seek( $bookmark_name ) {
*
* @since 6.2.0
*
- * @param WP_HTML_Text_Replacement $a First attribute update.
- * @param WP_HTML_Text_Replacement $b Second attribute update.
+ * @param WP_HTML_Text_Replacement $a First lexical update.
+ * @param WP_HTML_Text_Replacement $b Second lexical update.
* @return int Comparison value for string order.
*/
private static function sort_start_ascending( $a, $b ) {
@@ -1549,11 +1596,11 @@ private static function sort_start_ascending( $a, $b ) {
* @return string|boolean|null Value of enqueued update if present, otherwise false.
*/
private function get_enqueued_attribute_value( $comparable_name ) {
- if ( ! isset( $this->lexical_updates[ $comparable_name ] ) ) {
+ if ( ! isset( $this->attribute_updates[ $comparable_name ] ) ) {
return false;
}
- $enqueued_text = $this->lexical_updates[ $comparable_name ]->text;
+ $enqueued_text = $this->attribute_updates[ $comparable_name ]->text;
// Removed attributes erase the entire span.
if ( '' === $enqueued_text ) {
@@ -1626,7 +1673,7 @@ public function get_attribute( $name ) {
/*
* For every attribute other than `class` it's possible to perform a quick check if
- * there's an enqueued lexical update whose value takes priority over what's found in
+ * there's an enqueued attribute update whose value takes priority over what's found in
* the input document.
*
* The `class` attribute is special though because of the exposed helpers `add_class`
@@ -1636,7 +1683,7 @@ public function get_attribute( $name ) {
* into an attribute value update.
*/
if ( 'class' === $name ) {
- $this->class_name_updates_to_attributes_updates();
+ $this->class_name_updates_to_attribute_updates();
}
// Return any enqueued attribute value updates if they exist.
@@ -1864,8 +1911,8 @@ public function set_attribute( $name, $value ) {
*
* Result:
*/
- $existing_attribute = $this->attributes[ $comparable_name ];
- $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
+ $existing_attribute = $this->attributes[ $comparable_name ];
+ $this->attribute_updates[ $name ] = new WP_HTML_Text_Replacement(
$existing_attribute->start,
$existing_attribute->end,
$updated_attribute
@@ -1882,7 +1929,7 @@ public function set_attribute( $name, $value ) {
*
* Result:
*/
- $this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement(
+ $this->attribute_updates[ $comparable_name ] = new WP_HTML_Text_Replacement(
$this->tag_name_starts_at + $this->tag_name_length,
$this->tag_name_starts_at + $this->tag_name_length,
' ' . $updated_attribute
@@ -1940,8 +1987,8 @@ public function remove_attribute( $name ) {
* and when that attribute wasn't originally present.
*/
if ( ! isset( $this->attributes[ $name ] ) ) {
- if ( isset( $this->lexical_updates[ $name ] ) ) {
- unset( $this->lexical_updates[ $name ] );
+ if ( isset( $this->attribute_updates[ $name ] ) ) {
+ unset( $this->attribute_updates[ $name ] );
}
return false;
}
@@ -1957,7 +2004,7 @@ public function remove_attribute( $name ) {
*
* Result:
*/
- $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
+ $this->attribute_updates[ $name ] = new WP_HTML_Text_Replacement(
$this->attributes[ $name ]->start,
$this->attributes[ $name ]->end,
''
@@ -2026,7 +2073,10 @@ public function __toString() {
* @return string The processed HTML.
*/
public function get_updated_html() {
- $requires_no_updating = 0 === count( $this->classname_updates ) && 0 === count( $this->lexical_updates );
+ $requires_no_updating =
+ 0 === count( $this->classname_updates ) &&
+ 0 === count( $this->attribute_updates ) &&
+ 0 === count( $this->lexical_updates );
/*
* When there is nothing more to update and nothing has already been
@@ -2057,8 +2107,9 @@ public function get_updated_html() {
*
* Note: `apply_attributes_updates()` modifies `$this->output_buffer`.
*/
- $this->class_name_updates_to_attributes_updates();
- $this->apply_attributes_updates();
+ $this->class_name_updates_to_attribute_updates();
+ $this->attribute_updates_to_lexical_updates();
+ $this->apply_lexical_updates();
/*
* 2. Replace the original HTML with the now-updated HTML so that it's possible to
@@ -2261,4 +2312,4 @@ private function matches() {
return true;
}
-}
+}
\ No newline at end of file
diff --git a/src/wp-settings.php b/src/wp-settings.php
index a11b07ca28d07..ef5c6abc4355e 100644
--- a/src/wp-settings.php
+++ b/src/wp-settings.php
@@ -238,6 +238,7 @@
require ABSPATH . WPINC . '/html-api/class-wp-html-span.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-text-replacement.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-tag-processor.php';
+require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php';
require ABSPATH . WPINC . '/class-wp-http.php';
require ABSPATH . WPINC . '/class-wp-http-streams.php';
require ABSPATH . WPINC . '/class-wp-http-curl.php';
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php
new file mode 100644
index 0000000000000..1f1bf02237b39
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php
@@ -0,0 +1,22 @@
+Lorem
IpsumDolorSit' );
+ // The controller's schema is hardcoded, so tests would not be meaningful.
+ $p->next_tag_in_body_insertion_mode();
+ }
+
+}
diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-bookmark.php
index 04a6ae590cd7d..69a9695d1fd59 100644
--- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-bookmark.php
+++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-bookmark.php
@@ -63,6 +63,28 @@ public function test_seek() {
);
}
+ /**
+ * @ticket 56299
+ *
+ * @covers WP_HTML_Tag_Processor::seek
+ */
+ public function test_seeks_to_tag_closer_bookmark() {
+ $p = new WP_HTML_Tag_Processor( '
First
Second' );
+ $p->next_tag( array( 'tag_closers' => 'visit' ) );
+ $p->set_bookmark( 'first' );
+ $p->next_tag( array( 'tag_closers' => 'visit' ) );
+ $p->set_bookmark( 'second' );
+
+ $p->seek( 'first' );
+ $p->seek( 'second' );
+
+ $this->assertSame(
+ 'DIV',
+ $p->get_tag(),
+ 'Did not seek to the intended bookmark location'
+ );
+ }
+
/**
* WP_HTML_Tag_Processor used to test for the diffs affecting
* the adjusted bookmark position while simultaneously adjusting