diff --git a/.idea/encodings.xml b/.idea/encodings.xml index ea72052b66..f8989491c4 100644 --- a/.idea/encodings.xml +++ b/.idea/encodings.xml @@ -1,9 +1,11 @@ - + + + diff --git a/runtime-light/stdlib/string/regex-functions.cpp b/runtime-light/stdlib/string/regex-functions.cpp index 0aba6c839d..7bbe643afe 100644 --- a/runtime-light/stdlib/string/regex-functions.cpp +++ b/runtime-light/stdlib/string/regex-functions.cpp @@ -6,6 +6,8 @@ #include #include +#include +#include #include #include #include @@ -33,6 +35,7 @@ constexpr size_t ERROR_BUFFER_LENGTH = 256; enum class trailing_unmatch : uint8_t { skip, include }; +using backref = std::string_view; using regex_pcre2_group_names_t = kphp::stl::vector; struct RegexInfo final { @@ -102,6 +105,139 @@ int64_t skip_utf8_subsequent_bytes(int64_t offset, const std::string_view subjec return offset; } +std::optional try_get_backref(std::string_view preg_replacement) noexcept { + if (preg_replacement.empty() || !std::isdigit(preg_replacement[0])) { + return std::nullopt; + } + + if (preg_replacement.size() == 1 || !std::isdigit(preg_replacement[1])) { + return backref{preg_replacement.substr(0, 1)}; + } + + return backref{preg_replacement.substr(0, 2)}; +} + +using replacement_term = std::variant; + +class preg_replacement_parser { + std::string_view preg_replacement; + + replacement_term parse_term_internal() noexcept { + kphp::log::assertion(!preg_replacement.empty()); + auto first_char{preg_replacement.front()}; + preg_replacement = preg_replacement.substr(1); + if (preg_replacement.empty()) { + return first_char; + } + switch (first_char) { + case '$': + // $1, ${1} + if (preg_replacement.front() == '{') { + return try_get_backref(preg_replacement.substr(1)) + .and_then([this](auto value) noexcept -> std::optional { + auto digits_end_pos = 1 + value.size(); + if (digits_end_pos < preg_replacement.size() && preg_replacement[digits_end_pos] == '}') { + preg_replacement = preg_replacement.substr(1 + value.size() + 1); + return value; + } + + return std::nullopt; + }) + .value_or('$'); + } + + return try_get_backref(preg_replacement) + .transform([this](auto value) noexcept -> replacement_term { + auto digits_end_pos = value.size(); + preg_replacement = preg_replacement.substr(digits_end_pos); + return value; + }) + .value_or('$'); + + case '\\': { + // \1 + auto back_reference_opt{try_get_backref(preg_replacement).transform([this](auto value) noexcept -> replacement_term { + auto digits_end_pos = value.size(); + preg_replacement = preg_replacement.substr(digits_end_pos); + return value; + })}; + if (back_reference_opt.has_value()) { + return *back_reference_opt; + } else { + auto res{preg_replacement.front()}; + if (res == '$' || res == '\\') { + preg_replacement = preg_replacement.substr(1); + return res; + } + return '\\'; + } + } + default: + return first_char; + } + } + +public: + explicit preg_replacement_parser(std::string_view preg_replacement) noexcept + : preg_replacement{preg_replacement} {} + + struct iterator { + preg_replacement_parser* parser{nullptr}; + replacement_term current_term{'\0'}; + + using difference_type = std::ptrdiff_t; + using value_type = replacement_term; + using reference = const replacement_term&; + using pointer = const replacement_term*; + using iterator_category = std::input_iterator_tag; + + iterator() noexcept = default; + explicit iterator(preg_replacement_parser* p) noexcept + : parser{p} { + if (parser->preg_replacement.empty()) { + parser = nullptr; + } else { + current_term = parser->parse_term_internal(); + } + } + + reference operator*() const noexcept { + return current_term; + } + pointer operator->() const noexcept { + return std::addressof(current_term); + } + + iterator& operator++() noexcept { + if (!parser->preg_replacement.empty()) { + current_term = parser->parse_term_internal(); + } else { + parser = nullptr; + } + return *this; + } + iterator operator++(int) noexcept { + iterator temp = *this; + ++(*this); + return temp; + } + + friend bool operator==(const iterator& a, const iterator& b) noexcept { + return a.parser == b.parser; + } + friend bool operator!=(const iterator& a, const iterator& b) noexcept { + return !(a == b); + } + }; + + iterator begin() noexcept { + return iterator{this}; + } + iterator end() noexcept { + return iterator{}; + } +}; + bool parse_regex(RegexInfo& regex_info) noexcept { if (regex_info.regex.empty()) { kphp::log::warning("empty regex"); @@ -591,20 +727,23 @@ Optional f$preg_replace(const string& pattern, const string& replacement return {}; } - string pcre2_replacement{replacement}; - { // we need to replace PHP's back references with PCRE2 ones - static constexpr std::string_view backreference_pattern = R"(/\\(\d)/)"; - static constexpr std::string_view backreference_replacement = "$$$1"; - - RegexInfo regex_info{backreference_pattern, {replacement.c_str(), replacement.size()}, backreference_replacement}; - bool success{parse_regex(regex_info)}; - success &= compile_regex(regex_info); - success &= replace_regex(regex_info, std::numeric_limits::max()); - if (!success) [[unlikely]] { - kphp::log::warning("can't replace PHP back references with PCRE2 ones"); - return {}; + // we need to replace PHP's back references with PCRE2 ones + auto parser{preg_replacement_parser{{replacement.c_str(), replacement.size()}}}; + kphp::stl::string pcre2_replacement{}; + for (const auto& term : parser) { + if (std::holds_alternative(term)) { + auto c{std::get(term)}; + pcre2_replacement.push_back(c); + if (c == '$') { + pcre2_replacement.push_back('$'); + } + } else { + auto backreference{std::get(term)}; + pcre2_replacement.reserve(pcre2_replacement.size() + backreference.size() + 3); + pcre2_replacement.append("${"); + pcre2_replacement.append(backreference); + pcre2_replacement.append("}"); } - pcre2_replacement = regex_info.opt_replace_result.has_value() ? *std::move(regex_info.opt_replace_result) : replacement; } RegexInfo regex_info{{pattern.c_str(), pattern.size()}, {subject.c_str(), subject.size()}, {pcre2_replacement.c_str(), pcre2_replacement.size()}}; diff --git a/tests/phpt/dl/002_preg_replace_callback.php b/tests/phpt/dl/002_preg_replace_callback.php new file mode 100644 index 0000000000..16c97a8946 --- /dev/null +++ b/tests/phpt/dl/002_preg_replace_callback.php @@ -0,0 +1,82 @@ +@ok callback benchmark k2_skip +'.$input[1].''; + } + + + $res = preg_replace_callback($regex, 'parseTagsRecursive', $input, -1, $count); + var_dump ($count); + return (string)$res; + +} + +$output = parseTagsRecursive($input); + +echo $output, "\n"; + + +/** + * @kphp-required + * @param string[] $x + * @return string + */ +function g($x) { + return "'{$x[0]}'"; +} + +var_dump(preg_replace_callback('@\b\w{1,2}\b@', 'g', array('a b3 bcd', 'v' => 'aksfjk', 12 => 'aa bb'))); + +@var_dump(preg_replace_callback('~\A.~', 'g', array(array('xyz')))); + +/** + * @kphp-required + * @param string[] $m + * @return string + */ +function tmp($m) { + return strtolower($m[0]); +} + +var_dump(preg_replace_callback('~\A.~', 'tmp', 'ABC')); + +var_dump(preg_replace_callback("/(ab)(cd)(e)/", "cb", 'abcde')); diff --git a/tests/phpt/dl/003_preg_match_all.php b/tests/phpt/dl/003_preg_match_all.php new file mode 100644 index 0000000000..36271182ad --- /dev/null +++ b/tests/phpt/dl/003_preg_match_all.php @@ -0,0 +1,213 @@ +@ok callback benchmark k2_skip +~', 'This is no more', $v)); var_dump ($v); +var_dump (preg_match_all ('~.*?~', 'This', $v)); var_dump ($v); +var_dump (preg_match_all ('~.*~', 'This', $v)); var_dump ($v); + +var_dump (preg_match_all ('~<.*?>~', 'This is no more', $v)); var_dump ($v); + +var_dump (preg_match_all ('~(\d+|)~', '12|34|567|', $v)); var_dump ($v); +var_dump (preg_match_all ('~(\d+\|)~', '12|34|567|', $v)); var_dump ($v); +var_dump (preg_match_all ('~((\d)+\|)~', '12|34|567|', $v)); var_dump ($v); +var_dump (preg_match_all ('~((\d)+\|)+~', '12|34|567|', $v)); var_dump ($v); +var_dump (preg_match_all ('~((\d+)\|)+~', '12|34|567|', $v)); var_dump ($v); + + +$html = "bold textclick me"; + +preg_match_all("/(<([\w]+)[^>]*>)(.*?)(<\/\\2>)/", $html, $matches, PREG_SET_ORDER); + +foreach ($matches as $val) { + echo "matched: " . $val[0] . "\n"; + echo "part 1: " . $val[1] . "\n"; + echo "part 2: " . $val[2] . "\n"; + echo "part 3: " . $val[3] . "\n"; + echo "part 4: " . $val[4] . "\n\n"; +} + +preg_match_all("/\(? (\d{3})? \)? (?(1) [\-\s] ) \d{3}-\d{4}/x", + "Call 555-1212 or 1-800-555-1212", $phones); + +$str = <<\w+): (?P\d+)/', $str, $matches); + +print_r($matches); + + +$str0 = <<1)?(?P[a-c]+):() (?P\d+)(?Pa)?)', '=A=i', "/(a)?/") as $pattern) { + foreach (array('((1)?([a-c]+):() (\d+)(a)?)', '=A=i', "/(a)?/") as $pattern) { + foreach (array($str0, '', "a", "1abAcaba", "dad") as $str) { + preg_match_all ($pattern, $str, $matches, PREG_PATTERN_ORDER | PREG_OFFSET_CAPTURE); + if ($i == 0) { + var_dump ("preg_match_all($pattern, $str, PREG_PATTERN_ORDER | PREG_OFFSET_CAPTURE)"); + var_dump ($matches); + } + + preg_match_all ($pattern, $str, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); + if ($i == 0) { + var_dump ("preg_match_all ($pattern, $str, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)"); + var_dump ($matches); + } + + preg_match_all ($pattern, $str, $matches, PREG_PATTERN_ORDER); + if ($i == 0) { + var_dump ("preg_match_all ($pattern, $str, PREG_PATTERN_ORDER)"); + var_dump ($matches); + } + + preg_match_all ($pattern, $str, $matches, PREG_SET_ORDER); + if ($i == 0) { + var_dump ("preg_match_all ($pattern, $str, PREG_SET_ORDER)"); + var_dump ($matches); + } + + preg_match ($pattern, $str, $matches, PREG_OFFSET_CAPTURE); + if ($i == 0) { + var_dump ("preg_match ($pattern, $str, PREG_OFFSET_CAPTURE)"); + var_dump ($matches); + } + + preg_match ($pattern, $str, $matches); + if ($i == 0) { + var_dump ("preg_match ($pattern, $str)"); + var_dump ($matches); + } + } + } +} + + +foreach (array(PREG_PATTERN_ORDER, PREG_SET_ORDER) as $flag) { + var_dump(preg_match_all('~ + (?P + (?P(\d{2})?\d\d) - + (?P(?:\d\d|[a-zA-Z]{2,3})) - + (?P[0-3]?\d)) + ~x', + '2006-05-13 e outra data: "12-Aug-37"', $m, $flag)); + + var_dump($m); +} + + +var_dump(preg_match_all('/((?:(?:unsigned|struct)\s+)?\w+)(?:\s*(\*+)\s+|\s+(\**))(\w+(?:\[\s*\w*\s*\])?)\s*(?:(=)[^,;]+)?((?:\s*,\s*\**\s*\w+(?:\[\s*\w*\s*\])?\s*(?:=[^,;]+)?)*)\s*;/', 'unsigned int xpto = 124; short a, b;', $m, PREG_SET_ORDER)); +var_dump($m); + +var_dump(preg_match_all('/(?:\([^)]+\))?(&?)([\w>.()-]+(?:\[\w+\])?)\s*,?((?:\)*\s*=)?)/', '&a, b, &c', $m, PREG_SET_ORDER)); +var_dump($m); + +var_dump(preg_match_all('/zend_parse_parameters(?:_ex\s*\([^,]+,[^,]+|\s*\([^,]+),\s*"([^"]*)"\s*,\s*([^{;]*)/', 'zend_parse_parameters( 0, "addd|s/", a, b, &c);', $m, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)); +var_dump($m); + + +$sampledata = " +/p2/var/php_gcov/PHP_4_4/ext/ming/ming.c: In function `zif_swfbitmap_init': +/p2/var/php_gcov/PHP_4_4/ext/ming/ming.c:323: warning: assignment from incompatible pointer type +/p2/var/php_gcov/PHP_4_4/ext/ming/ming.c: In function `zif_swftextfield_setFont': +/p2/var/php_gcov/PHP_4_4/ext/ming/ming.c:2597: warning: passing arg 2 of `SWFTextField_setFont' from incompatible pointer type +/p2/var/php_gcov/PHP_4_4/ext/oci8/oci8.c:1027: warning: `oci_ping' defined but not used +/p2/var/php_gcov/PHP_4_4/ext/posix/posix.c: In function `zif_posix_getpgid': +/p2/var/php_gcov/PHP_4_4/ext/posix/posix.c:484: warning: implicit declaration of function `getpgid' +/p2/var/php_gcov/PHP_4_4/ext/posix/posix.c: In function `zif_posix_getsid': +/p2/var/php_gcov/PHP_4_4/ext/posix/posix.c:506: warning: implicit declaration of function `getsid' +/p2/var/php_gcov/PHP_4_4/ext/session/mod_files.c: In function `ps_read_files': +/p2/var/php_gcov/PHP_4_4/ext/session/mod_files.c:302: warning: implicit declaration of function `pread' +/p2/var/php_gcov/PHP_4_4/ext/session/mod_files.c: In function `ps_write_files': +/p2/var/php_gcov/PHP_4_4/ext/session/mod_files.c:340: warning: implicit declaration of function `pwrite' +/p2/var/php_gcov/PHP_4_4/ext/sockets/sockets.c: In function `zif_socket_get_option': +/p2/var/php_gcov/PHP_4_4/ext/sockets/sockets.c:1862: warning: unused variable `timeout' +/p2/var/php_gcov/PHP_4_4/ext/sockets/sockets.c: In function `zif_socket_set_option': +/p2/var/php_gcov/PHP_4_4/ext/sockets/sockets.c:1941: warning: unused variable `timeout' +/p2/var/php_gcov/PHP_4_4/regex/regexec.c:19: warning: `nope' defined but not used +/p2/var/php_gcov/PHP_4_4/ext/standard/exec.c:50: warning: `php_make_safe_mode_command' defined but not used +/p2/var/php_gcov/PHP_4_4/ext/standard/image.c: In function `php_handle_jpc': +/p2/var/php_gcov/PHP_4_4/ext/standard/image.c:604: warning: unused variable `dummy_int' +/p2/var/php_gcov/PHP_4_4/ext/standard/parsedate.c: In function `php_gd_parse': +/p2/var/php_gcov/PHP_4_4/ext/standard/parsedate.c:1138: warning: implicit declaration of function `php_gd_lex' +/p2/var/php_gcov/PHP_4_4/ext/standard/parsedate.y: At top level: +/p2/var/php_gcov/PHP_4_4/ext/standard/parsedate.y:864: warning: return type defaults to `int' +/p2/var/php_gcov/PHP_4_4/ext/sysvmsg/sysvmsg.c: In function `zif_msg_receive': +/p2/var/php_gcov/PHP_4_4/ext/sysvmsg/sysvmsg.c:318: warning: passing arg 2 of `php_var_unserialize' from incompatible pointer type +/p2/var/php_gcov/PHP_4_4/ext/yp/yp.c: In function `zif_yp_err_string': +/p2/var/php_gcov/PHP_4_4/ext/yp/yp.c:372: warning: assignment discards qualifiers from pointer target type +Zend/zend_language_scanner.c:5944: warning: `yy_fatal_error' defined but not used +Zend/zend_language_scanner.c:2627: warning: `yy_last_accepting_state' defined but not used +Zend/zend_language_scanner.c:2628: warning: `yy_last_accepting_cpos' defined but not used +Zend/zend_language_scanner.c:2634: warning: `yy_more_flag' defined but not used +Zend/zend_language_scanner.c:2635: warning: `yy_more_len' defined but not used +Zend/zend_language_scanner.c:5483: warning: `yyunput' defined but not used +Zend/zend_language_scanner.c:5929: warning: `yy_top_state' defined but not used +conflicts: 2 shift/reduce +Zend/zend_ini_scanner.c:457: warning: `yy_last_accepting_state' defined but not used +Zend/zend_ini_scanner.c:458: warning: `yy_last_accepting_cpos' defined but not used +Zend/zend_ini_scanner.c:1361: warning: `yyunput' defined but not used +/p2/var/php_gcov/PHP_4_4/Zend/zend_alloc.c: In function `_safe_emalloc': +/p2/var/php_gcov/PHP_4_4/Zend/zend_alloc.c:237: warning: long int format, size_t arg (arg 3) +/p2/var/php_gcov/PHP_4_4/Zend/zend_alloc.c:237: warning: long int format, size_t arg (arg 4) +/p2/var/php_gcov/PHP_4_4/Zend/zend_alloc.c:237: warning: long int format, size_t arg (arg 5) +/p2/var/php_gcov/PHP_4_4/Zend/zend_ini.c:338: warning: `zend_ini_displayer_cb' defined but not used +ext/mysql/libmysql/my_tempnam.o(.text+0x80): In function `my_tempnam': +/p2/var/php_gcov/PHP_4_4/ext/mysql/libmysql/my_tempnam.c:115: warning: the use of `tempnam' is dangerous, better use `mkstemp' +ext/mysql/libmysql/my_tempnam.o(.text+0x80): In function `my_tempnam': +/p2/var/php_gcov/PHP_4_4/ext/mysql/libmysql/my_tempnam.c:115: warning: the use of `tempnam' is dangerous, better use `mkstemp' +ext/ming/ming.o(.text+0xc115): In function `zim_swfmovie_namedAnchor': +/p2/var/php_gcov/PHP_5_2/ext/ming/ming.c:2207: undefined reference to `SWFMovie_namedAnchor' +/p2/var/php_gcov/PHP_5_2/ext/ming/ming.c:2209: undefined reference to `SWFMovie_xpto' +/p2/var/php_gcov/PHP_5_2/ext/ming/ming.c:2259: undefined reference to `SWFMovie_foo' +ext/ming/ming.o(.text+0x851): In function `zif_ming_setSWFCompression': +/p2/var/php_gcov/PHP_5_2/ext/ming/ming.c:154: undefined reference to `Ming_setSWFCompression' +"; + +$gcc_regex = '/^((.+)(\(\.text\+0x[[:xdigit:]]+\))?: In function [`\'](\w+)\':\s+)?'. + '((?(1)(?(3)[^:\n]+|\2)|[^:\n]+)):(\d+): (?:(error|warning):\s+)?(.+)'. + str_repeat('(?:\s+\5:(\d+): (?:(error|warning):\s+)?(.+))?', 99). // capture up to 100 errors + '/m'; + + +var_dump(preg_match_all($gcc_regex, $sampledata, $m, PREG_SET_ORDER)); +print_r($m); + + +var_dump(preg_match_all('|(\w+)://([^\s"<]*[\w+#?/&=])|', "This is a text string", $matches, PREG_SET_ORDER)); +var_dump($matches); + +/** + * @return mixed + */ +function func1(){ + $string = 'what the word and the other word the'; + preg_match_all('/(?Pthe)/', $string, $matches); + return $matches['word']; +} +$words = func1(); +var_dump($words); + + +$pattern = +"/\s([\w_\.\/]+)(?:=([\'\"]?(?:[\w\d\s\?=\(\)\.,'_#\/\\:;&-]|(?:\\\\\"|\\\')?)+[\'\"]?))?/"; +$context = ""; + +$match = array(); + +if ($result = preg_match_all($pattern, $context, $match)) +{ +var_dump($result); +var_dump($match); +} + + +var_dump(preg_match_all('/\d+/', '123 456 789 012', $match, 0)); +var_dump($match); diff --git a/tests/phpt/dl/004_preg_replace.php b/tests/phpt/dl/004_preg_replace.php new file mode 100644 index 0000000000..8d5439a88d --- /dev/null +++ b/tests/phpt/dl/004_preg_replace.php @@ -0,0 +1,91 @@ +@ok callback benchmark +~', 'This is no more', $v)); var_dump ($v); -var_dump (preg_match_all ('~.*?~', 'This', $v)); var_dump ($v); -var_dump (preg_match_all ('~.*~', 'This', $v)); var_dump ($v); - -var_dump (preg_match_all ('~<.*?>~', 'This is no more', $v)); var_dump ($v); - -var_dump (preg_match_all ('~(\d+|)~', '12|34|567|', $v)); var_dump ($v); -var_dump (preg_match_all ('~(\d+\|)~', '12|34|567|', $v)); var_dump ($v); -var_dump (preg_match_all ('~((\d)+\|)~', '12|34|567|', $v)); var_dump ($v); -var_dump (preg_match_all ('~((\d)+\|)+~', '12|34|567|', $v)); var_dump ($v); -var_dump (preg_match_all ('~((\d+)\|)+~', '12|34|567|', $v)); var_dump ($v); - -var_dump (preg_replace ('~|q~', '{\0}', 'eq')); -var_dump (preg_replace ('~|q~', '{\0}', 'ex')); - -var_dump (preg_replace ('~|q~', 'w', 'e')); -var_dump (preg_replace ('~|q~', 'w', 'q')); -/* bug in PHP -var_dump (preg_replace ('~|й~u', 'п', 'р')); -*/ -var_dump (preg_replace ('~|й~u', 'п', 'й')); var_dump (preg_split ('~|й~u', 'п')); var_dump (preg_split ('~|й~u', 'й')); -define('RE_URL_PATTERN', '(?bold textclick me"; - -preg_match_all("/(<([\w]+)[^>]*>)(.*?)(<\/\\2>)/", $html, $matches, PREG_SET_ORDER); - -foreach ($matches as $val) { - echo "matched: " . $val[0] . "\n"; - echo "part 1: " . $val[1] . "\n"; - echo "part 2: " . $val[2] . "\n"; - echo "part 3: " . $val[3] . "\n"; - echo "part 4: " . $val[4] . "\n\n"; -} - -preg_match_all("/\(? (\d{3})? \)? (?(1) [\-\s] ) \d{3}-\d{4}/x", - "Call 555-1212 or 1-800-555-1212", $phones); - -$str = <<\w+): (?P\d+)/', $str, $matches); - -print_r($matches); - $keywords = preg_split("/[\s,]+/", "hypertext language, programming"); print_r($keywords); @@ -124,58 +16,6 @@ function prcConvertHyperref($matches) { $chars = preg_split('/ /', $str, -1, PREG_SPLIT_OFFSET_CAPTURE); print_r($chars); -var_dump (preg_replace ('~a|~', 'b', 'a')); -var_dump (preg_replace ('~a|~', 'a', 'b')); - -$str0 = <<1)?(?P[a-c]+):() (?P\d+)(?Pa)?)', '=A=i', "/(a)?/") as $pattern) { - foreach (array('((1)?([a-c]+):() (\d+)(a)?)', '=A=i', "/(a)?/") as $pattern) { - foreach (array($str0, '', "a", "1abAcaba", "dad") as $str) { - preg_match_all ($pattern, $str, $matches, PREG_PATTERN_ORDER | PREG_OFFSET_CAPTURE); - if ($i == 0) { - var_dump ("preg_match_all($pattern, $str, PREG_PATTERN_ORDER | PREG_OFFSET_CAPTURE)"); - var_dump ($matches); - } - - preg_match_all ($pattern, $str, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); - if ($i == 0) { - var_dump ("preg_match_all ($pattern, $str, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)"); - var_dump ($matches); - } - - preg_match_all ($pattern, $str, $matches, PREG_PATTERN_ORDER); - if ($i == 0) { - var_dump ("preg_match_all ($pattern, $str, PREG_PATTERN_ORDER)"); - var_dump ($matches); - } - - preg_match_all ($pattern, $str, $matches, PREG_SET_ORDER); - if ($i == 0) { - var_dump ("preg_match_all ($pattern, $str, PREG_SET_ORDER)"); - var_dump ($matches); - } - - preg_match ($pattern, $str, $matches, PREG_OFFSET_CAPTURE); - if ($i == 0) { - var_dump ("preg_match ($pattern, $str, PREG_OFFSET_CAPTURE)"); - var_dump ($matches); - } - - preg_match ($pattern, $str, $matches); - if ($i == 0) { - var_dump ("preg_match ($pattern, $str)"); - var_dump ($matches); - } - } - } -} - foreach (array('2006-05-13', '06-12-12', 'data: "12-Aug-87"') as $s) { var_dump(preg_match('~ (?P @@ -187,123 +27,6 @@ function prcConvertHyperref($matches) { var_dump($m); } -foreach (array(PREG_PATTERN_ORDER, PREG_SET_ORDER) as $flag) { - var_dump(preg_match_all('~ - (?P - (?P(\d{2})?\d\d) - - (?P(?:\d\d|[a-zA-Z]{2,3})) - - (?P[0-3]?\d)) - ~x', - '2006-05-13 e outra data: "12-Aug-37"', $m, $flag)); - - var_dump($m); -} - - -var_dump(preg_match_all('/((?:(?:unsigned|struct)\s+)?\w+)(?:\s*(\*+)\s+|\s+(\**))(\w+(?:\[\s*\w*\s*\])?)\s*(?:(=)[^,;]+)?((?:\s*,\s*\**\s*\w+(?:\[\s*\w*\s*\])?\s*(?:=[^,;]+)?)*)\s*;/', 'unsigned int xpto = 124; short a, b;', $m, PREG_SET_ORDER)); -var_dump($m); - -var_dump(preg_match_all('/(?:\([^)]+\))?(&?)([\w>.()-]+(?:\[\w+\])?)\s*,?((?:\)*\s*=)?)/', '&a, b, &c', $m, PREG_SET_ORDER)); -var_dump($m); - -var_dump(preg_match_all('/zend_parse_parameters(?:_ex\s*\([^,]+,[^,]+|\s*\([^,]+),\s*"([^"]*)"\s*,\s*([^{;]*)/', 'zend_parse_parameters( 0, "addd|s/", a, b, &c);', $m, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)); -var_dump($m); - -var_dump(preg_replace(array('@//.*@', '@/\*.*\*/@sU'), array('', 'preg_replace("/[^\r\n]+/", "", \'$0\')'), "hello\n//x \n/*\ns\n*/")); - -$sampledata = " -/p2/var/php_gcov/PHP_4_4/ext/ming/ming.c: In function `zif_swfbitmap_init': -/p2/var/php_gcov/PHP_4_4/ext/ming/ming.c:323: warning: assignment from incompatible pointer type -/p2/var/php_gcov/PHP_4_4/ext/ming/ming.c: In function `zif_swftextfield_setFont': -/p2/var/php_gcov/PHP_4_4/ext/ming/ming.c:2597: warning: passing arg 2 of `SWFTextField_setFont' from incompatible pointer type -/p2/var/php_gcov/PHP_4_4/ext/oci8/oci8.c:1027: warning: `oci_ping' defined but not used -/p2/var/php_gcov/PHP_4_4/ext/posix/posix.c: In function `zif_posix_getpgid': -/p2/var/php_gcov/PHP_4_4/ext/posix/posix.c:484: warning: implicit declaration of function `getpgid' -/p2/var/php_gcov/PHP_4_4/ext/posix/posix.c: In function `zif_posix_getsid': -/p2/var/php_gcov/PHP_4_4/ext/posix/posix.c:506: warning: implicit declaration of function `getsid' -/p2/var/php_gcov/PHP_4_4/ext/session/mod_files.c: In function `ps_read_files': -/p2/var/php_gcov/PHP_4_4/ext/session/mod_files.c:302: warning: implicit declaration of function `pread' -/p2/var/php_gcov/PHP_4_4/ext/session/mod_files.c: In function `ps_write_files': -/p2/var/php_gcov/PHP_4_4/ext/session/mod_files.c:340: warning: implicit declaration of function `pwrite' -/p2/var/php_gcov/PHP_4_4/ext/sockets/sockets.c: In function `zif_socket_get_option': -/p2/var/php_gcov/PHP_4_4/ext/sockets/sockets.c:1862: warning: unused variable `timeout' -/p2/var/php_gcov/PHP_4_4/ext/sockets/sockets.c: In function `zif_socket_set_option': -/p2/var/php_gcov/PHP_4_4/ext/sockets/sockets.c:1941: warning: unused variable `timeout' -/p2/var/php_gcov/PHP_4_4/regex/regexec.c:19: warning: `nope' defined but not used -/p2/var/php_gcov/PHP_4_4/ext/standard/exec.c:50: warning: `php_make_safe_mode_command' defined but not used -/p2/var/php_gcov/PHP_4_4/ext/standard/image.c: In function `php_handle_jpc': -/p2/var/php_gcov/PHP_4_4/ext/standard/image.c:604: warning: unused variable `dummy_int' -/p2/var/php_gcov/PHP_4_4/ext/standard/parsedate.c: In function `php_gd_parse': -/p2/var/php_gcov/PHP_4_4/ext/standard/parsedate.c:1138: warning: implicit declaration of function `php_gd_lex' -/p2/var/php_gcov/PHP_4_4/ext/standard/parsedate.y: At top level: -/p2/var/php_gcov/PHP_4_4/ext/standard/parsedate.y:864: warning: return type defaults to `int' -/p2/var/php_gcov/PHP_4_4/ext/sysvmsg/sysvmsg.c: In function `zif_msg_receive': -/p2/var/php_gcov/PHP_4_4/ext/sysvmsg/sysvmsg.c:318: warning: passing arg 2 of `php_var_unserialize' from incompatible pointer type -/p2/var/php_gcov/PHP_4_4/ext/yp/yp.c: In function `zif_yp_err_string': -/p2/var/php_gcov/PHP_4_4/ext/yp/yp.c:372: warning: assignment discards qualifiers from pointer target type -Zend/zend_language_scanner.c:5944: warning: `yy_fatal_error' defined but not used -Zend/zend_language_scanner.c:2627: warning: `yy_last_accepting_state' defined but not used -Zend/zend_language_scanner.c:2628: warning: `yy_last_accepting_cpos' defined but not used -Zend/zend_language_scanner.c:2634: warning: `yy_more_flag' defined but not used -Zend/zend_language_scanner.c:2635: warning: `yy_more_len' defined but not used -Zend/zend_language_scanner.c:5483: warning: `yyunput' defined but not used -Zend/zend_language_scanner.c:5929: warning: `yy_top_state' defined but not used -conflicts: 2 shift/reduce -Zend/zend_ini_scanner.c:457: warning: `yy_last_accepting_state' defined but not used -Zend/zend_ini_scanner.c:458: warning: `yy_last_accepting_cpos' defined but not used -Zend/zend_ini_scanner.c:1361: warning: `yyunput' defined but not used -/p2/var/php_gcov/PHP_4_4/Zend/zend_alloc.c: In function `_safe_emalloc': -/p2/var/php_gcov/PHP_4_4/Zend/zend_alloc.c:237: warning: long int format, size_t arg (arg 3) -/p2/var/php_gcov/PHP_4_4/Zend/zend_alloc.c:237: warning: long int format, size_t arg (arg 4) -/p2/var/php_gcov/PHP_4_4/Zend/zend_alloc.c:237: warning: long int format, size_t arg (arg 5) -/p2/var/php_gcov/PHP_4_4/Zend/zend_ini.c:338: warning: `zend_ini_displayer_cb' defined but not used -ext/mysql/libmysql/my_tempnam.o(.text+0x80): In function `my_tempnam': -/p2/var/php_gcov/PHP_4_4/ext/mysql/libmysql/my_tempnam.c:115: warning: the use of `tempnam' is dangerous, better use `mkstemp' -ext/mysql/libmysql/my_tempnam.o(.text+0x80): In function `my_tempnam': -/p2/var/php_gcov/PHP_4_4/ext/mysql/libmysql/my_tempnam.c:115: warning: the use of `tempnam' is dangerous, better use `mkstemp' -ext/ming/ming.o(.text+0xc115): In function `zim_swfmovie_namedAnchor': -/p2/var/php_gcov/PHP_5_2/ext/ming/ming.c:2207: undefined reference to `SWFMovie_namedAnchor' -/p2/var/php_gcov/PHP_5_2/ext/ming/ming.c:2209: undefined reference to `SWFMovie_xpto' -/p2/var/php_gcov/PHP_5_2/ext/ming/ming.c:2259: undefined reference to `SWFMovie_foo' -ext/ming/ming.o(.text+0x851): In function `zif_ming_setSWFCompression': -/p2/var/php_gcov/PHP_5_2/ext/ming/ming.c:154: undefined reference to `Ming_setSWFCompression' -"; - -$gcc_regex = '/^((.+)(\(\.text\+0x[[:xdigit:]]+\))?: In function [`\'](\w+)\':\s+)?'. - '((?(1)(?(3)[^:\n]+|\2)|[^:\n]+)):(\d+): (?:(error|warning):\s+)?(.+)'. - str_repeat('(?:\s+\5:(\d+): (?:(error|warning):\s+)?(.+))?', 99). // capture up to 100 errors - '/m'; - - -var_dump(preg_match_all($gcc_regex, $sampledata, $m, PREG_SET_ORDER)); -print_r($m); - - -/** - * @kphp-required - * @param string[] $param - * @return string - */ -function cb($param) { - var_dump($param); - return "yes!"; -} - -#var_dump(preg_replace('', array(), '')); - -var_dump(preg_match_all('|(\w+)://([^\s"<]*[\w+#?/&=])|', "This is a text string", $matches, PREG_SET_ORDER)); -var_dump($matches); - -/** - * @return mixed - */ -function func1(){ - $string = 'what the word and the other word the'; - preg_match_all('/(?Pthe)/', $string, $matches); - return $matches['word']; -} -$words = func1(); -var_dump($words); $foo = 'bla bla bla'; @@ -318,18 +41,6 @@ function func1(){ var_dump(preg_match('@^(/([a-z]+))+$@', $subject, $m)); var_dump($m); var_dump(preg_match('@^(/(?:[a-z]+))+$@', $subject, $m)); var_dump($m); -$pattern = -"/\s([\w_\.\/]+)(?:=([\'\"]?(?:[\w\d\s\?=\(\)\.,'_#\/\\:;&-]|(?:\\\\\"|\\\')?)+[\'\"]?))?/"; -$context = ""; - -$match = array(); - -if ($result = preg_match_all($pattern, $context, $match)) -{ -var_dump($result); -var_dump($match); -} - $regex = '/(insert|drop|create|select|delete|update)([^;\']*('."('[^']*')+".')?)*(;|$)/i'; $sql = 'SELECT * FROM #__components'; @@ -343,24 +54,12 @@ function func1(){ */ - -var_dump(preg_replace(array('/\da(.)/ui', '@..@'), '$1', '12Abc')); -var_dump(preg_replace(array('/\da(.)/ui', '@(.)@'), '$1', array('x','a2aA', '1av2Ab'))); - - -var_dump(preg_replace(array('/[\w]+/'), array('$'), array('xyz', 'bdbd'))); -var_dump(preg_replace(array('/\s+/', '~[b-d]~'), array('$'), array('x y', 'bd bc'))); - - var_dump(preg_match('/\d+/', '123 456 789 012', $match, 0)); var_dump($match); var_dump(preg_match('/\d+/', '123 456 789 012', $match, 0)); var_dump($match); -var_dump(preg_match_all('/\d+/', '123 456 789 012', $match, 0)); -var_dump($match); - var_dump(preg_split('/PHP_(?:NAMED_)?(?:FUNCTION|METHOD)\s*\((\w+(?:,\s*\w+)?)\)/', "PHP_FUNCTION(s, preg_match)\n{\nlalala", -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE)); @@ -384,80 +83,3 @@ function func1(){ var_dump(preg_split('/(\d*)/', 'ab2c3u', -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE)); var_dump(preg_split('/(\d*)/', 'ab2c3u', -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE)); var_dump(preg_split('/(\d*)/', 'ab2c3u', -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE)); - - - - - -/* -PHP 5.2.0 - 5.3.6 bug -$text = '[CODE]<td align="$stylevar[right]">[/CODE]'; -$result = preg_replace(array('#\[(right)\](((?R)|[^[]+?|\[)*)\[/\\1\]#siU', '#\[(right)\](((?R)|[^[]+?|\[)*)\[/\\1\]#siU'), '', $text); -var_dump($text); -var_dump($result); - -$result = preg_replace('#\[(right)\](((?R)|[^[]+?|\[)*)\[/\\1\]#siU', '', $text); -var_dump($text); -var_dump($result); -*/ - - - - - - - -$input = "plain [indent] deep [indent] [abcd]deeper[/abcd] [/indent] deep [/indent] plain"; - -/** - * @param mixed $input - * @return string - */ -function parseTagsRecursive($input) -{ - global $count; - $regex = '#\[indent]((?:[^[]|\[(?!/?indent])|(?R))+)\[/indent]#'; - - if (is_array($input)) { - $input = '
'.$input[1].'
'; - } - - - $res = preg_replace_callback($regex, 'parseTagsRecursive', $input, -1, $count); - var_dump ($count); - return (string)$res; - -} - -$output = parseTagsRecursive($input); - -echo $output, "\n"; - - -/** - * @kphp-required - * @param string[] $x - * @return string - */ -function g($x) { - return "'{$x[0]}'"; -} - -var_dump(preg_replace_callback('@\b\w{1,2}\b@', 'g', array('a b3 bcd', 'v' => 'aksfjk', 12 => 'aa bb'))); - -@var_dump(preg_replace_callback('~\A.~', 'g', array(array('xyz')))); - -/** - * @kphp-required - * @param string[] $m - * @return string - */ -function tmp($m) { - return strtolower($m[0]); -} - -var_dump(preg_replace_callback('~\A.~', 'tmp', 'ABC')); - -var_dump(preg_replace_callback("/(ab)(cd)(e)/", "cb", 'abcde')); - -