diff --git a/.github/actions/verify-generated-files/action.yml b/.github/actions/verify-generated-files/action.yml index 13f0cf9f128b..79c49dbfcfff 100644 --- a/.github/actions/verify-generated-files/action.yml +++ b/.github/actions/verify-generated-files/action.yml @@ -13,5 +13,4 @@ runs: ext/tokenizer/tokenizer_data_gen.php build/gen_stub.php -f --generate-optimizer-info --verify ext/phar/makestub.php - # Use the -a flag for a bug in git 2.46.0, which doesn't consider changed -diff files. - git add . -N && git diff -a --exit-code + .github/scripts/test-directory-unchanged.sh . diff --git a/.github/scripts/download-bundled/pcre2.sh b/.github/scripts/download-bundled/pcre2.sh new file mode 100755 index 000000000000..b43554206c92 --- /dev/null +++ b/.github/scripts/download-bundled/pcre2.sh @@ -0,0 +1,32 @@ +#!/bin/sh +set -ex +cd "$(dirname "$0")/../../.." + +revision=refs/tags/pcre2-10.44 + +git clone --depth 1 --recurse-submodules --revision="$revision" https://github.com/PCRE2Project/pcre2.git /tmp/php-src-bundled/pcre2 + +rm -rf ext/pcre/pcre2lib +cp -R /tmp/php-src-bundled/pcre2/src ext/pcre/pcre2lib + +cd ext/pcre/pcre2lib + +# remove unneeded files +rm config.h.generic +rm pcre2.h.in +rm pcre2_dftables.c +rm pcre2_fuzzsupport.c +rm pcre2_jit_test.c +rm pcre2demo.c +rm pcre2grep.c +rm pcre2posix.c +rm pcre2posix.h +rm pcre2posix_test.c +rm pcre2test.c + +# move renamed files +mv pcre2.h.generic pcre2.h +mv pcre2_chartables.c.dist pcre2_chartables.c + +# add extra files +git restore config.h # based on config.h.generic but with many changes diff --git a/.github/scripts/test-directory-unchanged.sh b/.github/scripts/test-directory-unchanged.sh new file mode 100755 index 000000000000..0ce7fd4cc4af --- /dev/null +++ b/.github/scripts/test-directory-unchanged.sh @@ -0,0 +1,13 @@ +#!/bin/sh +set -ex + +cd "$(dirname "$0")/../../$1" + +# notify git about untracked (except ignored) files +git add -N . + +# display overview of changed files +git status . + +# display diff of working directory vs HEAD commit and set exit code +git diff -a --exit-code HEAD . diff --git a/.github/workflows/verify-bundled-files.yml b/.github/workflows/verify-bundled-files.yml new file mode 100644 index 000000000000..e15fcb36a0e7 --- /dev/null +++ b/.github/workflows/verify-bundled-files.yml @@ -0,0 +1,43 @@ +name: Verify Bundled Files + +on: + push: + paths: &paths + - '.github/scripts/download-bundled/pcre2.sh' + - 'ext/pcre/pcre2lib/**' + pull_request: + paths: *paths + schedule: + - cron: "0 1 * * *" + workflow_dispatch: ~ + +permissions: + contents: read + +jobs: + VERIFY_BUNDLED_FILES: + name: Verify Bundled Files + runs-on: ubuntu-24.04 + steps: + - name: git checkout + uses: actions/checkout@v5 + + - name: Detect changed files + uses: dorny/paths-filter@v3 + id: changes + with: + base: master + filters: | + pcre2: + - '.github/scripts/download-bundled/pcre2.sh' + - 'ext/pcre/pcre2lib/**' + + - name: PCRE2 + if: ${{ !cancelled() && (steps.changes.outputs.pcre2 == 'true' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') }} + run: | + echo "::group::Download" + .github/scripts/download-bundled/pcre2.sh + echo "::endgroup::" + echo "::group::Verify files" + .github/scripts/test-directory-unchanged.sh ext/pcre/pcre2lib + echo "::endgroup::" diff --git a/ext/dom/lexbor/selectors-adapted/selectors.c b/ext/dom/lexbor/selectors-adapted/selectors.c index b547a3a586b2..0bb285b39e99 100644 --- a/ext/dom/lexbor/selectors-adapted/selectors.c +++ b/ext/dom/lexbor/selectors-adapted/selectors.c @@ -35,7 +35,22 @@ static void dom_lxb_str_wrapper_release(dom_lxb_str_wrapper *wrapper) } } -static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_literal(const xmlNode *node, const char *name) +static bool lxb_selectors_str_cmp_loright(const char *lhs, const char *rhs) +{ + while (true) { + if (*rhs != zend_tolower_ascii(*lhs)) { + return false; + } + if (!*lhs) { + return true; + } + ++rhs; + ++lhs; + } +} + +/* `name` is lowercase */ +static zend_always_inline bool lxb_selectors_cmp_html_name_lit(const xmlNode *node, const char *name) { return strcmp((const char *) node->name, name) == 0; } @@ -48,14 +63,15 @@ static zend_always_inline bool lxb_selectors_adapted_cmp_ns(const xmlNode *a, co static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_id(const xmlNode *node, const lxb_selectors_adapted_id *id) { - uintptr_t ptr = (uintptr_t) node->name; - if (id->interned && (ptr & (ZEND_MM_ALIGNMENT - 1)) != 0) { - /* It cannot be a heap-allocated string because the pointer is not properly aligned for a heap allocation. - * Therefore, it must be interned into the dictionary pool. */ - return node->name == id->name; + ZEND_ASSERT(node->doc != NULL); + if (php_dom_ns_is_html_and_document_is_html(node)) { + /* From https://html.spec.whatwg.org/#case-sensitivity-of-selectors: + * The element name must be compared case sensitively _after_ converting the selector to lowercase. + * E.g. selector "DIV" must match element "div" but not "Div". */ + return lxb_selectors_str_cmp_loright((const char *) id->name, (const char *) node->name); + } else { + return strcmp((const char *) node->name, (const char *) id->name) == 0; } - - return strcmp((const char *) node->name, (const char *) id->name) == 0; } static zend_always_inline const xmlAttr *lxb_selectors_adapted_attr(const xmlNode *node, const lxb_char_t *name) @@ -64,9 +80,8 @@ static zend_always_inline const xmlAttr *lxb_selectors_adapted_attr(const xmlNod ZEND_ASSERT(node->doc != NULL); if (php_dom_ns_is_html_and_document_is_html(node)) { /* No need to handle DTD entities as we're in HTML. */ - size_t name_bound = strlen((const char *) name) + 1; for (const xmlAttr *cur = node->properties; cur != NULL; cur = cur->next) { - if (lexbor_str_data_nlocmp_right(cur->name, name, name_bound)) { + if (lxb_selectors_str_cmp_loright((const char *) name, (const char *) cur->name)) { attr = cur; break; } @@ -154,18 +169,7 @@ static bool lxb_selectors_is_lowercased_html_attrib_name(const lxb_css_selector_ static void lxb_selectors_adapted_set_entry_id_ex(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node) { entry->id.attr_case_insensitive = lxb_selectors_is_lowercased_html_attrib_name(selector); - - if (node->doc != NULL && node->doc->dict != NULL) { - const xmlChar *interned = xmlDictExists(node->doc->dict, selector->name.data, selector->name.length); - if (interned != NULL) { - entry->id.name = interned; - entry->id.interned = true; - return; - } - } - entry->id.name = selector->name.data; - entry->id.interned = false; } static zend_always_inline void lxb_selectors_adapted_set_entry_id(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node) @@ -1686,8 +1690,8 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector, case LXB_CSS_SELECTOR_PSEUDO_CLASS_ANY_LINK: /* https://drafts.csswg.org/selectors/#the-any-link-pseudo */ if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) - && (lxb_selectors_adapted_cmp_local_name_literal(node, "a") - || lxb_selectors_adapted_cmp_local_name_literal(node, "area"))) + && (lxb_selectors_cmp_html_name_lit(node, "a") + || lxb_selectors_cmp_html_name_lit(node, "area"))) { return lxb_selectors_adapted_has_attr(node, "href"); } @@ -1705,7 +1709,7 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector, if (!php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)) { return false; } - if (lxb_selectors_adapted_cmp_local_name_literal(node, "input")) { + if (lxb_selectors_cmp_html_name_lit(node, "input")) { const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "type"); if (dom_attr == NULL) { return false; @@ -1729,7 +1733,7 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector, return res; } - else if(lxb_selectors_adapted_cmp_local_name_literal(node, "option")) { + else if(lxb_selectors_cmp_html_name_lit(node, "option")) { return lxb_selectors_adapted_has_attr(node, "selected"); } @@ -1802,8 +1806,8 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector, case LXB_CSS_SELECTOR_PSEUDO_CLASS_LINK: /* https://html.spec.whatwg.org/multipage/semantics-other.html#selector-link */ if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) - && (lxb_selectors_adapted_cmp_local_name_literal(node, "a") - || lxb_selectors_adapted_cmp_local_name_literal(node, "area"))) + && (lxb_selectors_cmp_html_name_lit(node, "a") + || lxb_selectors_cmp_html_name_lit(node, "area"))) { return lxb_selectors_adapted_has_attr(node, "href"); } @@ -1823,9 +1827,9 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector, case LXB_CSS_SELECTOR_PSEUDO_CLASS_OPTIONAL: if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) - && (lxb_selectors_adapted_cmp_local_name_literal(node, "input") - || lxb_selectors_adapted_cmp_local_name_literal(node, "select") - || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea"))) + && (lxb_selectors_cmp_html_name_lit(node, "input") + || lxb_selectors_cmp_html_name_lit(node, "select") + || lxb_selectors_cmp_html_name_lit(node, "textarea"))) { return !lxb_selectors_adapted_has_attr(node, "required"); } @@ -1840,8 +1844,8 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector, case LXB_CSS_SELECTOR_PSEUDO_CLASS_PLACEHOLDER_SHOWN: if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) - && (lxb_selectors_adapted_cmp_local_name_literal(node, "input") - || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea"))) + && (lxb_selectors_cmp_html_name_lit(node, "input") + || lxb_selectors_cmp_html_name_lit(node, "textarea"))) { return lxb_selectors_adapted_has_attr(node, "placeholder"); } @@ -1856,9 +1860,9 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector, case LXB_CSS_SELECTOR_PSEUDO_CLASS_REQUIRED: if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) - && (lxb_selectors_adapted_cmp_local_name_literal(node, "input") - || lxb_selectors_adapted_cmp_local_name_literal(node, "select") - || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea"))) + && (lxb_selectors_cmp_html_name_lit(node, "input") + || lxb_selectors_cmp_html_name_lit(node, "select") + || lxb_selectors_cmp_html_name_lit(node, "textarea"))) { return lxb_selectors_adapted_has_attr(node, "required"); } @@ -2104,24 +2108,24 @@ lxb_selectors_pseudo_class_disabled(const xmlNode *node) } if (lxb_selectors_adapted_has_attr(node, "disabled") - && (lxb_selectors_adapted_cmp_local_name_literal(node, "button") - || lxb_selectors_adapted_cmp_local_name_literal(node, "input") - || lxb_selectors_adapted_cmp_local_name_literal(node, "select") - || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea") - || lxb_selectors_adapted_cmp_local_name_literal(node, "optgroup") - || lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset"))) + && (lxb_selectors_cmp_html_name_lit(node, "button") + || lxb_selectors_cmp_html_name_lit(node, "input") + || lxb_selectors_cmp_html_name_lit(node, "select") + || lxb_selectors_cmp_html_name_lit(node, "textarea") + || lxb_selectors_cmp_html_name_lit(node, "optgroup") + || lxb_selectors_cmp_html_name_lit(node, "fieldset"))) { return true; } - if (lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset")) { + if (lxb_selectors_cmp_html_name_lit(node, "fieldset")) { const xmlNode *fieldset = node; node = node->parent; while (node != NULL && CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) { /* node is a disabled fieldset that is an ancestor of fieldset */ if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) - && lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset") + && lxb_selectors_cmp_html_name_lit(node, "fieldset") && lxb_selectors_adapted_has_attr(node, "disabled")) { /* Search first legend child and figure out if fieldset is a descendent from that. */ @@ -2129,7 +2133,7 @@ lxb_selectors_pseudo_class_disabled(const xmlNode *node) do { if (search_current->type == XML_ELEMENT_NODE && php_dom_ns_is_fast(search_current, php_dom_ns_is_html_magic_token) - && lxb_selectors_adapted_cmp_local_name_literal(search_current, "legend")) { + && lxb_selectors_cmp_html_name_lit(search_current, "legend")) { /* search_current is a legend element. */ const xmlNode *inner_search_current = fieldset; @@ -2235,8 +2239,8 @@ static bool lxb_selectors_pseudo_class_read_write(const xmlNode *node) { if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)) { - if (lxb_selectors_adapted_cmp_local_name_literal(node, "input") - || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")) { + if (lxb_selectors_cmp_html_name_lit(node, "input") + || lxb_selectors_cmp_html_name_lit(node, "textarea")) { return !lxb_selectors_adapted_has_attr(node, "readonly") && !lxb_selectors_adapted_has_attr(node, "disabled"); } else { const xmlAttr *attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "contenteditable"); diff --git a/ext/dom/lexbor/selectors-adapted/selectors.h b/ext/dom/lexbor/selectors-adapted/selectors.h index c0f76cce3d5c..b64a9e49ee26 100644 --- a/ext/dom/lexbor/selectors-adapted/selectors.h +++ b/ext/dom/lexbor/selectors-adapted/selectors.h @@ -77,7 +77,6 @@ typedef lxb_selectors_entry_t * typedef struct { const xmlChar *name; - bool interned; bool attr_case_insensitive; } lxb_selectors_adapted_id; diff --git a/ext/dom/tests/modern/css_selectors/gh20395.phpt b/ext/dom/tests/modern/css_selectors/gh20395.phpt new file mode 100644 index 000000000000..af04cb1c27a4 --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/gh20395.phpt @@ -0,0 +1,33 @@ +--TEST-- +GH-20395 (\Dom\ParentNode::querySelector and \Dom\ParentNode::querySelectorAll requires elements in $selectors to be lowercase) +--EXTENSIONS-- +dom +--CREDITS-- +DeveloperRob +--FILE-- +'; +$dom = Dom\HtmlDocument::createFromString($html); +var_dump(is_null($dom->querySelector('html'))); +var_dump(is_null($dom->querySelector('Html'))); +var_dump(is_null($dom->querySelector('HTML'))); + +$dom->body->appendChild($dom->createElement('div')); +$dom->body->appendChild($dom->createElementNS('http://www.w3.org/1999/xhtml', 'Div')); + +foreach ($dom->querySelectorAll('div') as $div) { + var_dump($div->localName); +} + +foreach ($dom->querySelectorAll('Div') as $div) { + var_dump($div->localName); +} + +?> +--EXPECT-- +bool(false) +bool(false) +bool(false) +string(3) "div" +string(3) "div" diff --git a/ext/dom/tests/modern/css_selectors/pseudo_classes_links.phpt b/ext/dom/tests/modern/css_selectors/pseudo_classes_links.phpt index 8a688286b680..7afcb6e3cfbd 100644 --- a/ext/dom/tests/modern/css_selectors/pseudo_classes_links.phpt +++ b/ext/dom/tests/modern/css_selectors/pseudo_classes_links.phpt @@ -11,6 +11,7 @@ $dom = DOM\XMLDocument::createFromString(<< Link Link + Not actually a link Link XML); @@ -18,6 +19,7 @@ XML); test_helper($dom, ':any-link'); test_helper($dom, ':link'); test_helper($dom, 'a:not(:any-link)'); +test_helper($dom, ':not(:any-link)'); ?> --EXPECT-- @@ -29,3 +31,12 @@ test_helper($dom, 'a:not(:any-link)'); Link --- Selector: a:not(:any-link) --- Link +--- Selector: :not(:any-link) --- + + Link + Link + Not actually a link + Link + +Link +Not actually a link diff --git a/ext/pcre/pcre2lib/pcre2_chartables.c b/ext/pcre/pcre2lib/pcre2_chartables.c index 861914d1ac3a..7362c3f2345a 100644 --- a/ext/pcre/pcre2lib/pcre2_chartables.c +++ b/ext/pcre/pcre2lib/pcre2_chartables.c @@ -5,7 +5,8 @@ /* This file was automatically written by the pcre2_dftables auxiliary program. It contains character tables that are used when no external tables are passed to PCRE2 by the application that calls it. The tables -are used only for characters whose code values are less than 256. */ +are used only for characters whose code values are less than 256, and +only relevant if not in UCP mode. */ /* This set of tables was written in the C locale. */ @@ -18,13 +19,6 @@ PCRE2 is configured with --enable-rebuild-chartables. However, you can run pcre2_dftables manually with the -L option to build tables using the LC_ALL locale. */ -/* The following #include is present because without it gcc 4.x may remove -the array definition from the final binary if PCRE2 is built into a static -library and dead code stripping is activated. This leads to link errors. -Pulling in the header ensures that the array gets flagged as "someone -outside this compilation unit might reference this" and so it will always -be supplied to the linker. */ - #ifdef HAVE_CONFIG_H #include "config.h" #endif @@ -163,7 +157,7 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */ 0x02 letter 0x04 lower case letter 0x08 decimal digit - 0x10 alphanumeric or '_' + 0x10 word (alphanumeric or '_') */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ diff --git a/ext/session/config.w32 b/ext/session/config.w32 index 825bc8b61d29..40f9f78a313e 100644 --- a/ext/session/config.w32 +++ b/ext/session/config.w32 @@ -3,10 +3,10 @@ ARG_ENABLE("session", "session support", "yes"); if (PHP_SESSION == "yes") { - EXTENSION("session", "mod_user_class.c session.c mod_files.c mod_mm.c mod_user.c", false /* never shared */, "/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"); + EXTENSION("session", "mod_user_class.c session.c mod_files.c mod_user.c", false /* never shared */, "/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"); ADD_EXTENSION_DEP('session', 'date'); // https://bugs.php.net/53141 ADD_EXTENSION_DEP('session', 'spl', true); AC_DEFINE("HAVE_PHP_SESSION", 1, "Define to 1 if the PHP extension 'session' is available."); - PHP_INSTALL_HEADERS("ext/session", "mod_mm.h php_session.h mod_files.h mod_user.h"); + PHP_INSTALL_HEADERS("ext/session", "php_session.h mod_files.h mod_user.h"); } diff --git a/ext/standard/array.c b/ext/standard/array.c index 0389eb1840a9..7bedbdfe59fd 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -7037,8 +7037,7 @@ PHP_FUNCTION(array_chunk) if (size > num_in) { if (num_in == 0) { - RETVAL_EMPTY_ARRAY(); - return; + RETURN_EMPTY_ARRAY(); } size = num_in; } @@ -7046,12 +7045,11 @@ PHP_FUNCTION(array_chunk) array_init_size(return_value, (uint32_t)(((num_in - 1) / size) + 1)); zend_hash_real_init_packed(Z_ARRVAL_P(return_value)); - ZVAL_UNDEF(&chunk); - ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, str_key, entry) { /* If new chunk, create and initialize it. */ - if (Z_TYPE(chunk) == IS_UNDEF) { + if (current == 0) { array_init_size(&chunk, (uint32_t)size); + add_next_index_zval(return_value, &chunk); } /* Add entry to the chunk, preserving keys if necessary. */ @@ -7066,19 +7064,10 @@ PHP_FUNCTION(array_chunk) } zval_add_ref(entry); - /* If reached the chunk size, add it to the result array, and reset the - * pointer. */ if (++current == size) { - add_next_index_zval(return_value, &chunk); - ZVAL_UNDEF(&chunk); current = 0; } } ZEND_HASH_FOREACH_END(); - - /* Add the final chunk if there is one. */ - if (Z_TYPE(chunk) != IS_UNDEF) { - add_next_index_zval(return_value, &chunk); - } } /* }}} */