From 6c8e333a5770e9044470db18bee8fe1be9911795 Mon Sep 17 00:00:00 2001 From: Alexander Polyakov Date: Mon, 4 Nov 2024 16:55:43 +0300 Subject: [PATCH] Get rid of most of global variables in regexp --- runtime-common/stdlib/string/regexp-context.h | 37 +++++++ .../stdlib/string/regexp-functions.h | 88 ++++++++------- runtime/regexp-context.cpp | 11 ++ runtime/regexp.cpp | 104 ++++++++++-------- runtime/runtime.cmake | 1 + 5 files changed, 150 insertions(+), 91 deletions(-) create mode 100644 runtime-common/stdlib/string/regexp-context.h create mode 100644 runtime/regexp-context.cpp diff --git a/runtime-common/stdlib/string/regexp-context.h b/runtime-common/stdlib/string/regexp-context.h new file mode 100644 index 000000000..1d8247b97 --- /dev/null +++ b/runtime-common/stdlib/string/regexp-context.h @@ -0,0 +1,37 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2024 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#pragma once + +#include +#include +#include + +#include +#include + +#include "common/mixin/not_copyable.h" +#include "runtime-common/core/runtime-core.h" + +class regexp; + +struct RegexpContext final : private vk::not_copyable { + static constexpr size_t MAX_SUBPATTERNS = 512; + + pcre_extra extra{}; + int64_t pcre_last_error{}; + int64_t preg_replace_count_dummy{}; + int64_t regexp_last_query_num{-1}; + // refactor me please :( + // for i-th match(capturing group) + // submatch[2 * i] - start position of match + // submatch[2 * i + 1] - end position of match + std::array submatch{}; + std::array RE2_submatch{}; + + std::array)> regexp_cache_storage{}; + array *regexp_cache{reinterpret_cast *>(regexp_cache_storage.data())}; + + static RegexpContext &get() noexcept; +}; diff --git a/runtime-common/stdlib/string/regexp-functions.h b/runtime-common/stdlib/string/regexp-functions.h index 1e9f507da..59612cc52 100644 --- a/runtime-common/stdlib/string/regexp-functions.h +++ b/runtime-common/stdlib/string/regexp-functions.h @@ -10,27 +10,23 @@ #include "runtime-common/core/runtime-core.h" #include "runtime-common/stdlib/string/mbstring-functions.h" +#include "runtime-common/stdlib/string/regexp-context.h" #include "runtime-common/stdlib/tracing/tracing.h" -#include "runtime/context/runtime-context.h" namespace re2 { class RE2; } // namespace re2 -extern int64_t preg_replace_count_dummy; +inline constexpr int64_t PREG_PATTERN_ORDER = 1; +inline constexpr int64_t PREG_SET_ORDER = 2; +inline constexpr int64_t PREG_OFFSET_CAPTURE = 4; -constexpr int64_t PREG_PATTERN_ORDER = 1; -constexpr int64_t PREG_SET_ORDER = 2; -constexpr int64_t PREG_OFFSET_CAPTURE = 4; +inline constexpr int64_t PREG_SPLIT_NO_EMPTY = 8; +inline constexpr int64_t PREG_SPLIT_DELIM_CAPTURE = 16; +inline constexpr int64_t PREG_SPLIT_OFFSET_CAPTURE = 32; -constexpr int64_t PREG_SPLIT_NO_EMPTY = 8; -constexpr int64_t PREG_SPLIT_DELIM_CAPTURE = 16; -constexpr int64_t PREG_SPLIT_OFFSET_CAPTURE = 32; - -constexpr int64_t PCRE_RECURSION_LIMIT = 100000; -constexpr int64_t PCRE_BACKTRACK_LIMIT = 1000000; - -constexpr int32_t MAX_SUBPATTERNS = 512; +inline constexpr int64_t PCRE_RECURSION_LIMIT = 100000; +inline constexpr int64_t PCRE_BACKTRACK_LIMIT = 1000000; enum { PHP_PCRE_NO_ERROR = 0, @@ -61,12 +57,6 @@ class regexp : vk::not_copyable { bool is_valid_RE2_regexp(const char *regexp_string, int64_t regexp_len, bool is_utf8, const char *function, const char *file) noexcept; - static pcre_extra extra; - - static int64_t pcre_last_error; - - static int32_t submatch[3 * MAX_SUBPATTERNS]; - template inline string get_replacement(const T &replace_val, const string &subject, int64_t count) const; @@ -150,58 +140,60 @@ inline Optional f$preg_match(const mixed ®ex, const string &subject, inline Optional f$preg_match_all(const mixed ®ex, const string &subject, mixed &matches, int64_t flags, int64_t offset = 0); template> -inline auto f$preg_replace(const T1 ®ex, const T2 &replace_val, const T3 &subject, int64_t limit = -1, int64_t &replace_count = preg_replace_count_dummy); +inline auto f$preg_replace(const T1 ®ex, const T2 &replace_val, const T3 &subject, int64_t limit = -1, + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); inline Optional f$preg_replace(const regexp ®ex, const string &replace_val, const string &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); inline Optional f$preg_replace(const regexp ®ex, const mixed &replace_val, const string &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); inline mixed f$preg_replace(const regexp ®ex, const string &replace_val, const mixed &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); inline mixed f$preg_replace(const regexp ®ex, const mixed &replace_val, const mixed &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); template inline auto f$preg_replace(const string ®ex, const T1 &replace_val, const T2 &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); inline Optional f$preg_replace(const mixed ®ex, const string &replace_val, const string &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); inline mixed f$preg_replace(const mixed ®ex, const string &replace_val, const mixed &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); inline Optional f$preg_replace(const mixed ®ex, const mixed &replace_val, const string &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); inline mixed f$preg_replace(const mixed ®ex, const mixed &replace_val, const mixed &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); template> -auto f$preg_replace_callback(const T1 ®ex, const T2 &replace_val, const T3 &subject, int64_t limit = -1, int64_t &replace_count = preg_replace_count_dummy); +auto f$preg_replace_callback(const T1 ®ex, const T2 &replace_val, const T3 &subject, int64_t limit = -1, + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); template Optional f$preg_replace_callback(const regexp ®ex, const T &replace_val, const string &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); template mixed f$preg_replace_callback(const regexp ®ex, const T &replace_val, const mixed &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); template auto f$preg_replace_callback(const string ®ex, const T &replace_val, const T2 &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); template Optional f$preg_replace_callback(const mixed ®ex, const T &replace_val, const string &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); template mixed f$preg_replace_callback(const mixed ®ex, const T &replace_val, const mixed &subject, int64_t limit = -1, - int64_t &replace_count = preg_replace_count_dummy); + int64_t &replace_count = RegexpContext::get().preg_replace_count_dummy); inline Optional> f$preg_split(const regexp ®ex, const string &subject, int64_t limit = -1, int64_t flags = 0); @@ -221,8 +213,10 @@ inline int64_t f$preg_last_error(); template<> inline string regexp::get_replacement(const string &replace_val, const string &subject, int64_t count) const { + auto &runtime_ctx = RuntimeContext::get(); + runtime_ctx.static_SB.clean(); + const string::size_type len = replace_val.size(); - kphp_runtime_context.static_SB.clean(); for (string::size_type i = 0; i < len; i++) { int64_t backref = -1; if (replace_val[i] == '\\' && (replace_val[i + 1] == '\\' || replace_val[i + 1] == '$')) { @@ -250,21 +244,23 @@ inline string regexp::get_replacement(const string &replace_val, const string &s } if (backref == -1) { - kphp_runtime_context.static_SB << replace_val[i]; + runtime_ctx.static_SB << replace_val[i]; } else { if (backref < count) { int64_t index = backref + backref; - kphp_runtime_context.static_SB.append(subject.c_str() + submatch[index], static_cast(submatch[index + 1] - submatch[index])); + const auto &submatch = RegexpContext::get().submatch; + runtime_ctx.static_SB.append(subject.c_str() + submatch[index], static_cast(submatch[index + 1] - submatch[index])); } } } - return kphp_runtime_context.static_SB.str(); // TODO optimize + return runtime_ctx.static_SB.str(); // TODO optimize } template string regexp::get_replacement(const T &replace_val, const string &subject, const int64_t count) const { array result_set(array_size(count + named_subpatterns_count, named_subpatterns_count == 0)); + const auto &submatch = RegexpContext::get().submatch; if (named_subpatterns_count) { for (int64_t i = 0; i < count; i++) { const string match_str(subject.c_str() + submatch[i + i], submatch[i + i + 1] - submatch[i + i]); @@ -282,6 +278,7 @@ string regexp::get_replacement(const T &replace_val, const string &subject, cons template Optional regexp::replace(const T &replace_val, const string &subject, int64_t limit, int64_t &replace_count) const { + auto &pcre_last_error = RegexpContext::get().pcre_last_error; pcre_last_error = 0; int64_t result_count = 0; // calls can be recursive, can't write to replace_count directly @@ -323,6 +320,7 @@ Optional regexp::replace(const T &replace_val, const string &subject, in result_count++; limit--; + const auto &submatch = RegexpContext::get().submatch; int64_t match_begin = submatch[0]; offset = submatch[1]; @@ -493,7 +491,7 @@ mixed f$preg_replace(const regexp ®ex, const mixed &replace_val, const mixed if (subject.is_array()) { replace_count = 0; - int64_t replace_count_one; + int64_t replace_count_one = 0; const array &subject_arr = subject.as_array(""); array result(subject_arr.size()); for (array::const_iterator it = subject_arr.begin(); it != subject_arr.end(); ++it) { @@ -527,7 +525,7 @@ Optional f$preg_replace(const mixed ®ex, const mixed &replace_val, co Optional result = subject; replace_count = 0; - int64_t replace_count_one; + int64_t replace_count_one = 0; if (replace_val.is_array()) { array::const_iterator cur_replace_val = replace_val.begin(); @@ -565,7 +563,7 @@ Optional f$preg_replace(const mixed ®ex, const mixed &replace_val, co mixed f$preg_replace(const mixed ®ex, const mixed &replace_val, const mixed &subject, int64_t limit, int64_t &replace_count) { if (subject.is_array()) { replace_count = 0; - int64_t replace_count_one; + int64_t replace_count_one = 0; const array &subject_arr = subject.as_array(""); array result(subject_arr.size()); for (array::const_iterator it = subject_arr.begin(); it != subject_arr.end(); ++it) { @@ -603,7 +601,7 @@ mixed f$preg_replace_callback(const regexp ®ex, const T &replace_val, const m } if (subject.is_array()) { replace_count = 0; - int64_t replace_count_one; + int64_t replace_count_one = 0; const array &subject_arr = subject.as_array(""); array result(subject_arr.size()); for (array::const_iterator it = subject_arr.begin(); it != subject_arr.end(); ++it) { @@ -630,7 +628,7 @@ Optional f$preg_replace_callback(const mixed ®ex, const T &replace_va Optional result = subject; replace_count = 0; - int64_t replace_count_one; + int64_t replace_count_one = 0; for (array::const_iterator it = regex.begin(); it != regex.end(); ++it) { result = f$preg_replace_callback(it.get_value().to_string(), replace_val, result, limit, replace_count_one); @@ -647,7 +645,7 @@ template mixed f$preg_replace_callback(const mixed ®ex, const T &replace_val, const mixed &subject, int64_t limit, int64_t &replace_count) { if (subject.is_array()) { replace_count = 0; - int64_t replace_count_one; + int64_t replace_count_one = 0; const array &subject_arr = subject.as_array(""); array result(subject_arr.size()); for (array::const_iterator it = subject_arr.begin(); it != subject_arr.end(); ++it) { diff --git a/runtime/regexp-context.cpp b/runtime/regexp-context.cpp new file mode 100644 index 000000000..44e8ecfb1 --- /dev/null +++ b/runtime/regexp-context.cpp @@ -0,0 +1,11 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2024 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#include "runtime-common/stdlib/string/regexp-context.h" + +static RegexpContext regexp_context{}; + +RegexpContext &RegexpContext::get() noexcept { + return regexp_context; +} diff --git a/runtime/regexp.cpp b/runtime/regexp.cpp index 680133b2a..b4773c46a 100644 --- a/runtime/regexp.cpp +++ b/runtime/regexp.cpp @@ -1,23 +1,26 @@ // Compiler for PHP (aka KPHP) -// Copyright (c) 2020 LLC «V Kontakte» +// Copyright (c) 2024 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt #include "runtime-common/stdlib/string/regexp-functions.h" #include +#include + +#include #include #if ASAN_ENABLED #include #endif -#include "common/unicode/utf8-utils.h" +#include "common/unicode/utf8-utils.h" +#include "runtime-common/core/runtime-core.h" +#include "runtime-common/stdlib/string/regexp-context.h" #include "runtime/allocator.h" #include "runtime/critical_section.h" #include "server/php-engine-vars.h" #include "server/php-runner.h" -int64_t preg_replace_count_dummy; - // TODO: remove when/if we migrate to pcre2 #ifndef PCRE2_ERROR_BADOFFSET # define PCRE2_ERROR_BADOFFSET -33 @@ -26,14 +29,6 @@ int64_t preg_replace_count_dummy; # define PCRE2_UNSET -1 #endif -static re2::StringPiece RE2_submatch[MAX_SUBPATTERNS]; -// refactor me please :( -// for i-th match(capturing group) -// submatch[2 * i] - start position of match -// submatch[2 * i + 1] - end position of match -int32_t regexp::submatch[3 * MAX_SUBPATTERNS]; -pcre_extra regexp::extra; - static_assert(sizeof(regexp) == SIZEOF_REGEXP, "sizeof(regexp) at runtime doesn't match compile-time"); regexp::regexp(const string ®exp_string) { @@ -48,7 +43,7 @@ void regexp::pattern_compilation_warning(const char *function, const char *file, va_list args; va_start (args, message); char buf[1024]; - vsnprintf(buf, sizeof(buf), message, args); + std::ignore = vsnprintf(buf, sizeof(buf), message, args); va_end (args); if (function || file) { @@ -315,7 +310,7 @@ bool regexp::is_valid_RE2_regexp(const char *regexp_string, int64_t regexp_len, void regexp::init(const string ®exp_string, const char *function, const char *file) { static char regexp_cache_storage[sizeof(array)]; - static array *regexp_cache = (array *)regexp_cache_storage; + static auto *regexp_cache = reinterpret_cast *>(regexp_cache_storage); static long long regexp_last_query_num = -1; use_heap_memory = !(php_script.has_value() && php_script->is_running()); @@ -347,7 +342,7 @@ void regexp::init(const string ®exp_string, const char *function, const char init(regexp_string.c_str(), regexp_string.size(), function, file); if (!use_heap_memory) { - regexp *re = static_cast (dl::allocate(sizeof(regexp))); + auto *re = static_cast (dl::allocate(sizeof(regexp))); new(re) regexp(); re->subpatterns_count = subpatterns_count; @@ -370,7 +365,8 @@ void regexp::init(const char *regexp_string, int64_t regexp_len, const char *fun return; } - char start_delimiter = regexp_string[0], end_delimiter; + char start_delimiter = regexp_string[0]; + char end_delimiter = 0; switch (start_delimiter) { case '(': end_delimiter = ')'; @@ -413,7 +409,8 @@ void regexp::init(const char *regexp_string, int64_t regexp_len, const char *fun return; } - kphp_runtime_context.static_SB.clean().append(regexp_string + 1, static_cast(regexp_end - 1)); + auto &runtime_ctx = RuntimeContext::get(); + runtime_ctx.static_SB.clean().append(regexp_string + 1, static_cast(regexp_end - 1)); use_heap_memory = !(php_script.has_value() && php_script->is_running()); @@ -479,22 +476,22 @@ void regexp::init(const char *regexp_string, int64_t regexp_len, const char *fun } } - can_use_RE2 = can_use_RE2 && is_valid_RE2_regexp(kphp_runtime_context.static_SB.c_str(), kphp_runtime_context.static_SB.size(), is_utf8, function, file); + can_use_RE2 = can_use_RE2 && is_valid_RE2_regexp(runtime_ctx.static_SB.c_str(), runtime_ctx.static_SB.size(), is_utf8, function, file); - if (is_utf8 && !mb_UTF8_check(kphp_runtime_context.static_SB.c_str())) { - pattern_compilation_warning(function, file, "Regexp \"%s\" contains not UTF-8 symbols", kphp_runtime_context.static_SB.c_str()); + if (is_utf8 && !mb_UTF8_check(runtime_ctx.static_SB.c_str())) { + pattern_compilation_warning(function, file, "Regexp \"%s\" contains not UTF-8 symbols", runtime_ctx.static_SB.c_str()); clean(); return; } bool need_pcre = false; if (can_use_RE2) { - RE2_regexp = new RE2(re2::StringPiece(kphp_runtime_context.static_SB.c_str(), kphp_runtime_context.static_SB.size()), RE2_options); + RE2_regexp = new RE2(re2::StringPiece(runtime_ctx.static_SB.c_str(), runtime_ctx.static_SB.size()), RE2_options); #if ASAN_ENABLED __lsan_ignore_object(RE2_regexp); #endif if (!RE2_regexp->ok()) { - pattern_compilation_warning(function, file, "RE2 compilation of regexp \"%s\" failed. Error %d at %s", kphp_runtime_context.static_SB.c_str(), RE2_regexp->error_code(), RE2_regexp->error().c_str()); + pattern_compilation_warning(function, file, "RE2 compilation of regexp \"%s\" failed. Error %d at %s", runtime_ctx.static_SB.c_str(), RE2_regexp->error_code(), RE2_regexp->error().c_str()); delete RE2_regexp; RE2_regexp = nullptr; @@ -512,9 +509,9 @@ void regexp::init(const char *regexp_string, int64_t regexp_len, const char *fun } if (RE2_regexp == nullptr || need_pcre) { - const char *error; + const char *error = nullptr; int32_t erroffset = 0; - pcre_regexp = pcre_compile(kphp_runtime_context.static_SB.c_str(), pcre_options, &error, &erroffset, nullptr); + pcre_regexp = pcre_compile(runtime_ctx.static_SB.c_str(), pcre_options, &error, &erroffset, nullptr); #if ASAN_ENABLED __lsan_ignore_object(pcre_regexp); #endif @@ -545,11 +542,11 @@ void regexp::init(const char *regexp_string, int64_t regexp_len, const char *fun int32_t name_entry_size = 0; php_assert (pcre_fullinfo(pcre_regexp, nullptr, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size) == 0); - char *name_table; + char *name_table = nullptr; php_assert (pcre_fullinfo(pcre_regexp, nullptr, PCRE_INFO_NAMETABLE, &name_table) == 0); for (int64_t i = 0; i < named_subpatterns_count; i++) { - int64_t name_id = (((unsigned char)name_table[0]) << 8) + (unsigned char)name_table[1]; + int64_t name_id = ((static_cast(name_table[0])) << 8) + static_cast(name_table[1]); string name(name_table + 2); if (use_heap_memory) { @@ -568,8 +565,8 @@ void regexp::init(const char *regexp_string, int64_t regexp_len, const char *fun } subpatterns_count++; - if (subpatterns_count > MAX_SUBPATTERNS) { - pattern_compilation_warning(function, file, "Maximum number of subpatterns %d exceeded, %d subpatterns found", MAX_SUBPATTERNS, subpatterns_count); + if (subpatterns_count > RegexpContext::MAX_SUBPATTERNS) { + pattern_compilation_warning(function, file, "Maximum number of subpatterns %zu exceeded, %d subpatterns found", RegexpContext::MAX_SUBPATTERNS, subpatterns_count); subpatterns_count = 0; delete RE2_regexp; @@ -614,17 +611,18 @@ regexp::~regexp() { } } - -int64_t regexp::pcre_last_error; - int64_t regexp::exec(const string &subject, int64_t offset, bool second_try) const { + auto ®exp_ctx = RegexpContext::get(); + auto &submatch = regexp_ctx.submatch; + auto &RE2_submatch = regexp_ctx.RE2_submatch; + if (RE2_regexp && !second_try) { { dl::CriticalSectionGuard critical_section; auto malloc_replacement_guard = make_malloc_replacement_with_script_allocator(!use_heap_memory); re2::StringPiece text(subject.c_str(), subject.size()); - bool matched = RE2_regexp->Match(text, static_cast(offset), subject.size(), RE2::UNANCHORED, RE2_submatch, subpatterns_count); + bool matched = RE2_regexp->Match(text, static_cast(offset), subject.size(), RE2::UNANCHORED, RE2_submatch.data(), subpatterns_count); if (!matched) { return 0; } @@ -650,8 +648,8 @@ int64_t regexp::exec(const string &subject, int64_t offset, bool second_try) con int32_t options = second_try ? PCRE_NO_UTF8_CHECK | PCRE_NOTEMPTY_ATSTART : PCRE_NO_UTF8_CHECK; dl::enter_critical_section();//OK - int64_t count = pcre_exec(pcre_regexp, &extra, subject.c_str(), subject.size(), - static_cast(offset), options, submatch, 3 * subpatterns_count); + int64_t count = pcre_exec(pcre_regexp, ®exp_ctx.extra, subject.c_str(), subject.size(), + static_cast(offset), options, submatch.data(), 3 * subpatterns_count); dl::leave_critical_section(); php_assert (count != 0); @@ -659,7 +657,7 @@ int64_t regexp::exec(const string &subject, int64_t offset, bool second_try) con return 0; } if (count < 0) { - pcre_last_error = count; + regexp_ctx.pcre_last_error = count; return 0; } @@ -668,6 +666,9 @@ int64_t regexp::exec(const string &subject, int64_t offset, bool second_try) con Optional regexp::match(const string &subject, bool all_matches) const { + auto ®exp_ctx = RegexpContext::get(); + auto &submatch = regexp_ctx.submatch; + auto &pcre_last_error = regexp_ctx.pcre_last_error; pcre_last_error = 0; check_pattern_compilation_warning(); @@ -715,6 +716,9 @@ Optional regexp::match(const string &subject, bool all_matches) const { } Optional regexp::match(const string &subject, mixed &matches, bool all_matches, int64_t offset) const { + auto ®exp_ctx = RegexpContext::get(); + auto &submatch = regexp_ctx.submatch; + auto &pcre_last_error = regexp_ctx.pcre_last_error; pcre_last_error = 0; check_pattern_compilation_warning(); @@ -814,6 +818,9 @@ Optional regexp::match(const string &subject, mixed &matches, bool all_ } Optional regexp::match(const string &subject, mixed &matches, int64_t flags, bool all_matches, int64_t offset) const { + auto ®exp_ctx = RegexpContext::get(); + auto &submatch = regexp_ctx.submatch; + auto &pcre_last_error = regexp_ctx.pcre_last_error; pcre_last_error = 0; check_pattern_compilation_warning(); @@ -960,6 +967,9 @@ Optional regexp::match(const string &subject, mixed &matches, int64_t f } Optional> regexp::split(const string &subject, int64_t limit, int64_t flags) const { + auto ®exp_ctx = RegexpContext::get(); + auto &submatch = regexp_ctx.submatch; + auto &pcre_last_error = regexp_ctx.pcre_last_error; pcre_last_error = 0; check_pattern_compilation_warning(); @@ -1055,7 +1065,7 @@ Optional> regexp::split(const string &subject, int64_t limit, int64 } int64_t regexp::last_error() { - switch (pcre_last_error) { + switch (RegexpContext::get().pcre_last_error) { case PHP_PCRE_NO_ERROR: return PHP_PCRE_NO_ERROR; case PCRE_ERROR_MATCHLIMIT: @@ -1078,7 +1088,8 @@ int64_t regexp::last_error() { string f$preg_quote(const string &str, const string &delimiter) { const string::size_type len = str.size(); - kphp_runtime_context.static_SB.clean().reserve(4 * len); + auto &runtime_ctx = RuntimeContext::get(); + runtime_ctx.static_SB.clean().reserve(4 * len); for (string::size_type i = 0; i < len; i++) { switch (str[i]) { @@ -1103,28 +1114,29 @@ string f$preg_quote(const string &str, const string &delimiter) { case ':': case '-': case '#': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char(str[i]); + runtime_ctx.static_SB.append_char('\\'); + runtime_ctx.static_SB.append_char(str[i]); break; case '\0': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('0'); - kphp_runtime_context.static_SB.append_char('0'); - kphp_runtime_context.static_SB.append_char('0'); + runtime_ctx.static_SB.append_char('\\'); + runtime_ctx.static_SB.append_char('0'); + runtime_ctx.static_SB.append_char('0'); + runtime_ctx.static_SB.append_char('0'); break; default: if (!delimiter.empty() && str[i] == delimiter[0]) { - kphp_runtime_context.static_SB.append_char('\\'); + runtime_ctx.static_SB.append_char('\\'); } - kphp_runtime_context.static_SB.append_char(str[i]); + runtime_ctx.static_SB.append_char(str[i]); break; } } - return kphp_runtime_context.static_SB.str(); + return runtime_ctx.static_SB.str(); } void regexp::global_init() { + auto &extra = RegexpContext::get().extra; extra.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; extra.match_limit = PCRE_BACKTRACK_LIMIT; extra.match_limit_recursion = PCRE_RECURSION_LIMIT; diff --git a/runtime/runtime.cmake b/runtime/runtime.cmake index da3018eed..e64377d7d 100644 --- a/runtime/runtime.cmake +++ b/runtime/runtime.cmake @@ -105,6 +105,7 @@ prepend(KPHP_RUNTIME_SOURCES ${BASE_DIR}/runtime/ php-script-globals.cpp profiler.cpp regexp.cpp + regexp-context.cpp resumable.cpp rpc.cpp rpc_extra_info.cpp