From 791f4425684b12454906c0f4d147bb55a428c2e3 Mon Sep 17 00:00:00 2001 From: Alexander Polyakov Date: Fri, 1 Nov 2024 17:14:04 +0300 Subject: [PATCH] Move most of string functions into runtime-common/stdlib (#1130) --- .../core/allocator/runtime-allocator.h | 21 +- .../core/allocator/script-allocator-managed.h | 4 +- .../core/class-instance/class-instance.inl | 2 +- runtime-common/core/core-context.h | 29 + .../core/core-types/comparison_operators.inl | 4 +- .../core/core-types/definition/array.inl | 16 +- .../core/core-types/definition/mixed.inl | 6 +- .../core/core-types/definition/string.inl | 10 +- .../core-types/definition/string_buffer.cpp | 4 +- .../core-types/definition/string_buffer.inl | 12 +- runtime-common/core/runtime-core-context.h | 31 - runtime-common/core/runtime-core.h | 6 +- runtime-common/core/utils/hash.h | 2 + runtime-common/core/utils/migration-php8.cpp | 5 +- .../core/utils/small-object-storage.h | 4 +- runtime-common/runtime-common.cmake | 3 +- runtime-common/stdlib/stdlib.cmake | 3 + runtime-common/stdlib/string/string-context.h | 73 + .../stdlib/string/string-functions.cpp | 2756 ++++++++++++++++ .../stdlib/string/string-functions.h | 527 +++ .../allocator/runtime-light-allocator.cpp | 4 +- runtime-light/component/component.cpp | 2 +- runtime-light/component/component.h | 4 +- runtime-light/component/image.h | 5 +- .../core/globals/php-script-globals.cpp | 4 +- .../core/kphp-core-impl/kphp-core-context.cpp | 8 +- runtime-light/runtime-light.cmake | 5 + runtime-light/stdlib/output/print-functions.h | 8 - runtime-light/stdlib/stdlib.cmake | 1 - runtime-light/stdlib/string/concat.cpp | 59 - runtime-light/stdlib/string/concat.h | 48 - .../stdlib/string/string-context.cpp | 6 + runtime-light/stdlib/string/string-context.h | 12 +- .../stdlib/string/string-functions.h | 112 - runtime-light/tl/tl-types.h | 2 +- runtime/array_functions.cpp | 1 + runtime/confdata-functions.cpp | 3 +- runtime/context/runtime-context.cpp | 26 +- runtime/context/runtime-context.h | 13 +- runtime/context/runtime-core-allocator.cpp | 3 +- runtime/context/runtime-core-context.cpp | 24 - runtime/datetime/datetime_functions.cpp | 9 +- runtime/files.cpp | 13 +- runtime/interface.cpp | 24 +- runtime/json-functions.cpp | 4 +- runtime/kphp_tracing.cpp | 6 +- runtime/kphp_tracing_binlog.cpp | 10 +- runtime/mail.cpp | 2 +- runtime/math_functions.cpp | 10 +- runtime/mbstring.cpp | 5 +- runtime/mbstring.h | 22 +- runtime/openssl.cpp | 41 +- runtime/php_assert.cpp | 2 +- runtime/runtime.cmake | 6 +- runtime/streams.cpp | 3 +- runtime/string-context.cpp | 17 + runtime/string_functions.cpp | 2937 +---------------- runtime/string_functions.h | 470 +-- runtime/tcp.cpp | 2 +- runtime/udp.cpp | 3 +- runtime/url.cpp | 11 +- runtime/zlib.cpp | 20 +- runtime/zstd.cpp | 13 +- tests/cpp/runtime/_runtime-tests-env.cpp | 2 +- tests/cpp/runtime/string-test.cpp | 4 +- tests/cpp/runtime/zstd-test.cpp | 10 +- 66 files changed, 3644 insertions(+), 3870 deletions(-) create mode 100644 runtime-common/core/core-context.h delete mode 100644 runtime-common/core/runtime-core-context.h create mode 100644 runtime-common/stdlib/stdlib.cmake create mode 100644 runtime-common/stdlib/string/string-context.h create mode 100644 runtime-common/stdlib/string/string-functions.cpp create mode 100644 runtime-common/stdlib/string/string-functions.h delete mode 100644 runtime-light/stdlib/string/concat.cpp delete mode 100644 runtime-light/stdlib/string/concat.h delete mode 100644 runtime-light/stdlib/string/string-functions.h delete mode 100644 runtime/context/runtime-core-context.cpp create mode 100644 runtime/string-context.cpp diff --git a/runtime-common/core/allocator/runtime-allocator.h b/runtime-common/core/allocator/runtime-allocator.h index 8264a07392..9ec6a13eb0 100644 --- a/runtime-common/core/allocator/runtime-allocator.h +++ b/runtime-common/core/allocator/runtime-allocator.h @@ -5,26 +5,27 @@ #pragma once #include -#include "runtime-common/core/memory-resource/unsynchronized_pool_resource.h" +#include "common/mixin/not_copyable.h" +#include "runtime-common/core/memory-resource/unsynchronized_pool_resource.h" -struct RuntimeAllocator { - static RuntimeAllocator& current() noexcept; +struct RuntimeAllocator final : vk::not_copyable { + static RuntimeAllocator &get() noexcept; RuntimeAllocator() = default; RuntimeAllocator(size_t script_mem_size, size_t oom_handling_mem_size); - void init(void * buffer, size_t script_mem_size, size_t oom_handling_mem_size); + void init(void *buffer, size_t script_mem_size, size_t oom_handling_mem_size); void free(); - void * alloc_script_memory(size_t size) noexcept; - void * alloc0_script_memory(size_t size) noexcept; - void * realloc_script_memory(void *mem, size_t new_size, size_t old_size) noexcept; + void *alloc_script_memory(size_t size) noexcept; + void *alloc0_script_memory(size_t size) noexcept; + void *realloc_script_memory(void *mem, size_t new_size, size_t old_size) noexcept; void free_script_memory(void *mem, size_t size) noexcept; - void * alloc_global_memory(size_t size) noexcept; - void * alloc0_global_memory(size_t size) noexcept; - void * realloc_global_memory(void *mem, size_t new_size, size_t old_size) noexcept; + void *alloc_global_memory(size_t size) noexcept; + void *alloc0_global_memory(size_t size) noexcept; + void *realloc_global_memory(void *mem, size_t new_size, size_t old_size) noexcept; void free_global_memory(void *mem, size_t size) noexcept; memory_resource::unsynchronized_pool_resource memory_resource; diff --git a/runtime-common/core/allocator/script-allocator-managed.h b/runtime-common/core/allocator/script-allocator-managed.h index f51f642539..0e34ba85b5 100644 --- a/runtime-common/core/allocator/script-allocator-managed.h +++ b/runtime-common/core/allocator/script-allocator-managed.h @@ -11,7 +11,7 @@ class ScriptAllocatorManaged { public: static void *operator new(size_t size) noexcept { - return RuntimeAllocator::current().alloc_script_memory(size); + return RuntimeAllocator::get().alloc_script_memory(size); } static void *operator new(size_t, void *ptr) noexcept { @@ -19,7 +19,7 @@ class ScriptAllocatorManaged { } static void operator delete(void *ptr, size_t size) noexcept { - RuntimeAllocator::current().free_script_memory(ptr, size); + RuntimeAllocator::get().free_script_memory(ptr, size); } static void *operator new[](size_t count) = delete; diff --git a/runtime-common/core/class-instance/class-instance.inl b/runtime-common/core/class-instance/class-instance.inl index a7f572ed5a..02d8bf3a32 100644 --- a/runtime-common/core/class-instance/class-instance.inl +++ b/runtime-common/core/class-instance/class-instance.inl @@ -43,7 +43,7 @@ class_instance class_instance::alloc(Args &&... args) { template inline class_instance class_instance::empty_alloc() { static_assert(std::is_empty{}, "class T must be empty"); - uint32_t obj = ++KphpCoreContext::current().empty_obj_count; + uint32_t obj = ++RuntimeContext::get().empty_obj_count; new (&o) vk::intrusive_ptr(reinterpret_cast(obj)); return *this; } diff --git a/runtime-common/core/core-context.h b/runtime-common/core/core-context.h new file mode 100644 index 0000000000..55dca24249 --- /dev/null +++ b/runtime-common/core/core-context.h @@ -0,0 +1,29 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2024 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#pragma once + +#include +#include + +#include "common/mixin/not_copyable.h" + +#ifndef INCLUDED_FROM_KPHP_CORE +#error "this file must be included only from runtime-core.h" +#endif + +struct RuntimeContext final : vk::not_copyable { + int32_t show_migration_php8_warning{}; + int32_t php_disable_warnings{}; + uint32_t empty_obj_count{}; + + string_buffer_lib_context sb_lib_context{}; + string_buffer static_SB{}; + string_buffer static_SB_spare{}; + + void init() noexcept; + void free() noexcept; + + static RuntimeContext &get() noexcept; +}; diff --git a/runtime-common/core/core-types/comparison_operators.inl b/runtime-common/core/core-types/comparison_operators.inl index f7b6f2eb89..ba5f748646 100644 --- a/runtime-common/core/core-types/comparison_operators.inl +++ b/runtime-common/core/core-types/comparison_operators.inl @@ -96,7 +96,7 @@ inline bool eq2_number_string_as_php8(T lhs, const string &rhs) { inline bool eq2(int64_t lhs, const string &rhs) { const auto php7_result = eq2(lhs, rhs.to_float()); - if (KphpCoreContext::current().show_migration_php8_warning & MIGRATION_PHP8_STRING_COMPARISON_FLAG) { + if (RuntimeContext::get().show_migration_php8_warning & MIGRATION_PHP8_STRING_COMPARISON_FLAG) { const auto php8_result = eq2_number_string_as_php8(lhs, rhs); if (php7_result == php8_result) { return php7_result; @@ -117,7 +117,7 @@ inline bool eq2(const string &lhs, int64_t rhs) { inline bool eq2(double lhs, const string &rhs) { const auto php7_result = lhs == rhs.to_float(); - if (KphpCoreContext::current().show_migration_php8_warning & MIGRATION_PHP8_STRING_COMPARISON_FLAG) { + if (RuntimeContext::get().show_migration_php8_warning & MIGRATION_PHP8_STRING_COMPARISON_FLAG) { const auto php8_result = eq2_number_string_as_php8(lhs, rhs); if (php7_result == php8_result) { return php7_result; diff --git a/runtime-common/core/core-types/definition/array.inl b/runtime-common/core/core-types/definition/array.inl index 55d871568a..7bae3dbc9e 100644 --- a/runtime-common/core/core-types/definition/array.inl +++ b/runtime-common/core/core-types/definition/array.inl @@ -253,7 +253,7 @@ template typename array::array_inner *array::array_inner::create(int64_t new_int_size, bool is_vector) { const size_t mem_size = estimate_size(new_int_size, is_vector); if (is_vector) { - auto p = reinterpret_cast(RuntimeAllocator::current().alloc_script_memory(mem_size)); + auto p = reinterpret_cast(RuntimeAllocator::get().alloc_script_memory(mem_size)); p->is_vector_internal = true; p->ref_cnt = 0; p->max_key = -1; @@ -266,7 +266,7 @@ typename array::array_inner *array::array_inner::create(int64_t new_int_si return reinterpret_cast(static_cast(mem) + sizeof(array_inner_fields_for_map)); }; - array_inner *p = shift_pointer_to_array_inner(RuntimeAllocator::current().alloc0_script_memory(mem_size)); + array_inner *p = shift_pointer_to_array_inner(RuntimeAllocator::get().alloc0_script_memory(mem_size)); p->is_vector_internal = false; p->ref_cnt = 0; p->max_key = -1; @@ -291,7 +291,7 @@ void array::array_inner::dispose() { ((T *)entries())[i].~T(); } - RuntimeAllocator::current().free_script_memory((void *)this, sizeof_vector(buf_size)); + RuntimeAllocator::get().free_script_memory((void *)this, sizeof_vector(buf_size)); return; } @@ -304,7 +304,7 @@ void array::array_inner::dispose() { php_assert(this != empty_array()); auto shifted_this = std::launder(reinterpret_cast(this)) - sizeof(array_inner_fields_for_map); - RuntimeAllocator::current().free_script_memory(shifted_this, sizeof_map(buf_size)); + RuntimeAllocator::get().free_script_memory(shifted_this, sizeof_map(buf_size)); } } } @@ -732,7 +732,7 @@ void array::mutate_to_size(int64_t int_size) { php_critical_error ("max array size exceeded: int_size = %" PRIi64, int_size); } const auto new_int_buff_size = static_cast(int_size); - p = static_cast(RuntimeAllocator::current().realloc_script_memory(p, p->sizeof_vector(new_int_buff_size), p->sizeof_vector(p->buf_size))); + p = static_cast(RuntimeAllocator::get().realloc_script_memory(p, p->sizeof_vector(new_int_buff_size), p->sizeof_vector(p->buf_size))); p->buf_size = new_int_buff_size; } @@ -1681,7 +1681,7 @@ array &array::operator+=(const array &other) { p = new_array; } else if (p->buf_size < size + 2) { uint32_t new_size = max(size + 2, p->buf_size * 2); - p = (array_inner *)RuntimeAllocator::current().realloc_script_memory((void *)p, p->sizeof_vector(new_size), p->sizeof_vector(p->buf_size)); + p = (array_inner *)RuntimeAllocator::get().realloc_script_memory((void *)p, p->sizeof_vector(new_size), p->sizeof_vector(p->buf_size)); p->buf_size = new_size; } @@ -1925,7 +1925,7 @@ void array::sort(const T1 &compare, bool renumber) { mutate_if_map_shared(); } - array_bucket **arTmp = (array_bucket **)RuntimeAllocator::current().alloc_script_memory(n * sizeof(array_bucket * )); + array_bucket **arTmp = (array_bucket **)RuntimeAllocator::get().alloc_script_memory(n * sizeof(array_bucket * )); uint32_t i = 0; for (array_bucket *it = p->begin(); it != p->end(); it = p->next(it)) { arTmp[i++] = it; @@ -1947,7 +1947,7 @@ void array::sort(const T1 &compare, bool renumber) { arTmp[n - 1]->next = p->get_pointer(p->end()); p->end()->prev = p->get_pointer(arTmp[n - 1]); - RuntimeAllocator::current().free_script_memory(arTmp, n * sizeof(array_bucket * )); + RuntimeAllocator::get().free_script_memory(arTmp, n * sizeof(array_bucket * )); } diff --git a/runtime-common/core/core-types/definition/mixed.inl b/runtime-common/core/core-types/definition/mixed.inl index 055299533a..98adcac744 100644 --- a/runtime-common/core/core-types/definition/mixed.inl +++ b/runtime-common/core/core-types/definition/mixed.inl @@ -278,7 +278,7 @@ bool less_string_number_as_php8_impl(const string &lhs, T rhs) { template bool less_number_string_as_php8(bool php7_result, T lhs, const string &rhs) { - if (KphpCoreContext::current().show_migration_php8_warning & MIGRATION_PHP8_STRING_COMPARISON_FLAG) { + if (RuntimeContext::get().show_migration_php8_warning & MIGRATION_PHP8_STRING_COMPARISON_FLAG) { const auto php8_result = less_number_string_as_php8_impl(lhs, rhs); if (php7_result == php8_result) { return php7_result; @@ -296,7 +296,7 @@ bool less_number_string_as_php8(bool php7_result, T lhs, const string &rhs) { template bool less_string_number_as_php8(bool php7_result, const string &lhs, T rhs) { - if (KphpCoreContext::current().show_migration_php8_warning & MIGRATION_PHP8_STRING_COMPARISON_FLAG) { + if (RuntimeContext::get().show_migration_php8_warning & MIGRATION_PHP8_STRING_COMPARISON_FLAG) { const auto php8_result = less_string_number_as_php8_impl(lhs, rhs); if (php7_result == php8_result) { return php7_result; @@ -327,4 +327,4 @@ ResultClass from_mixed(const mixed &m, const string &) noexcept { } else { return ResultClass::create_from_base_raw_ptr(dynamic_cast(m.as_object_ptr())); } -} \ No newline at end of file +} diff --git a/runtime-common/core/core-types/definition/string.inl b/runtime-common/core/core-types/definition/string.inl index 56f57cf586..85ee1be978 100644 --- a/runtime-common/core/core-types/definition/string.inl +++ b/runtime-common/core/core-types/definition/string.inl @@ -57,7 +57,7 @@ string::size_type string::string_inner::new_capacity(size_type requested_capacit string::string_inner *string::string_inner::create(size_type requested_capacity, size_type old_capacity) { size_type capacity = new_capacity(requested_capacity, old_capacity); size_type new_size = (size_type)(sizeof(string_inner) + (capacity + 1)); - string_inner *p = (string_inner *)RuntimeAllocator::current().alloc_script_memory(new_size); + string_inner *p = (string_inner *)RuntimeAllocator::get().alloc_script_memory(new_size); p->capacity = capacity; return p; } @@ -67,7 +67,7 @@ char *string::string_inner::reserve(size_type requested_capacity) { size_type old_size = (size_type)(sizeof(string_inner) + (capacity + 1)); size_type new_size = (size_type)(sizeof(string_inner) + (new_cap + 1)); - string_inner *p = (string_inner *)RuntimeAllocator::current().realloc_script_memory((void *)this, new_size, old_size); + string_inner *p = (string_inner *)RuntimeAllocator::get().realloc_script_memory((void *)this, new_size, old_size); p->capacity = new_cap; return p->ref_data(); } @@ -83,7 +83,7 @@ void string::string_inner::dispose() { } void string::string_inner::destroy() { - RuntimeAllocator::current().free_script_memory(this, get_memory_usage()); + RuntimeAllocator::get().free_script_memory(this, get_memory_usage()); } inline string::size_type string::string_inner::get_memory_usage() const { @@ -701,7 +701,7 @@ bool string::try_to_float_as_php7(double *val) const { bool string::try_to_float(double *val, bool php8_warning) const { const bool is_float_php7 = try_to_float_as_php7(val); - if ((KphpCoreContext::current().show_migration_php8_warning & MIGRATION_PHP8_STRING_TO_FLOAT_FLAG) && php8_warning) { + if ((RuntimeContext::get().show_migration_php8_warning & MIGRATION_PHP8_STRING_TO_FLOAT_FLAG) && php8_warning) { const bool is_float_php8 = try_to_float_as_php8(val); if (is_float_php7 != is_float_php8) { @@ -861,7 +861,7 @@ bool string::is_numeric_as_php7() const { bool string::is_numeric() const { const auto php7_result = is_numeric_as_php7(); - if (KphpCoreContext::current().show_migration_php8_warning & MIGRATION_PHP8_STRING_TO_FLOAT_FLAG) { + if (RuntimeContext::get().show_migration_php8_warning & MIGRATION_PHP8_STRING_TO_FLOAT_FLAG) { const bool php8_result = is_numeric_as_php8(); if (php7_result != php8_result) { diff --git a/runtime-common/core/core-types/definition/string_buffer.cpp b/runtime-common/core/core-types/definition/string_buffer.cpp index a25413859d..98d4231a26 100644 --- a/runtime-common/core/core-types/definition/string_buffer.cpp +++ b/runtime-common/core/core-types/definition/string_buffer.cpp @@ -5,11 +5,11 @@ #include "runtime-common/core/runtime-core.h" string_buffer::string_buffer(string::size_type buffer_len) noexcept: - buffer_end(static_cast(RuntimeAllocator::current().alloc_global_memory(buffer_len))), + buffer_end(static_cast(RuntimeAllocator::get().alloc_global_memory(buffer_len))), buffer_begin(buffer_end), buffer_len(buffer_len) { } string_buffer::~string_buffer() noexcept { - RuntimeAllocator::current().free_global_memory(buffer_begin, buffer_len); + RuntimeAllocator::get().free_global_memory(buffer_begin, buffer_len); } diff --git a/runtime-common/core/core-types/definition/string_buffer.inl b/runtime-common/core/core-types/definition/string_buffer.inl index 744015dd22..7689e5214c 100644 --- a/runtime-common/core/core-types/definition/string_buffer.inl +++ b/runtime-common/core/core-types/definition/string_buffer.inl @@ -7,7 +7,7 @@ #endif inline void string_buffer::resize(string::size_type new_buffer_len) noexcept { - string_buffer_lib_context &sb_context = KphpCoreContext::current().sb_lib_context; + string_buffer_lib_context &sb_context = RuntimeContext::get().sb_lib_context; if (new_buffer_len < sb_context.MIN_BUFFER_LEN) { new_buffer_len = sb_context.MIN_BUFFER_LEN; } @@ -26,7 +26,7 @@ inline void string_buffer::resize(string::size_type new_buffer_len) noexcept { } string::size_type current_len = size(); - if(void *new_mem = RuntimeAllocator::current().realloc_global_memory(buffer_begin, new_buffer_len, buffer_len)) { + if(void *new_mem = RuntimeAllocator::get().realloc_global_memory(buffer_begin, new_buffer_len, buffer_len)) { buffer_begin = static_cast(new_mem); buffer_len = new_buffer_len; buffer_end = buffer_begin + current_len; @@ -35,7 +35,7 @@ inline void string_buffer::resize(string::size_type new_buffer_len) noexcept { inline void string_buffer::reserve_at_least(string::size_type need) noexcept { string::size_type new_buffer_len = need + size(); - while (unlikely (buffer_len < new_buffer_len && KphpCoreContext::current().sb_lib_context.error_flag != STRING_BUFFER_ERROR_FLAG_FAILED)) { + while (unlikely (buffer_len < new_buffer_len && RuntimeContext::get().sb_lib_context.error_flag != STRING_BUFFER_ERROR_FLAG_FAILED)) { resize(((new_buffer_len * 2 + 1 + 64) | 4095) - 64); } } @@ -72,7 +72,7 @@ string_buffer &operator<<(string_buffer &sb, const string &s) { string::size_type l = s.size(); sb.reserve_at_least(l); - if (unlikely (KphpCoreContext::current().sb_lib_context.error_flag == STRING_BUFFER_ERROR_FLAG_FAILED)) { + if (unlikely (RuntimeContext::get().sb_lib_context.error_flag == STRING_BUFFER_ERROR_FLAG_FAILED)) { return sb; } @@ -142,7 +142,7 @@ bool string_buffer::set_pos(int64_t pos) { string_buffer &string_buffer::append(const char *str, size_t len) noexcept { reserve_at_least(static_cast(len)); - if (unlikely (KphpCoreContext::current().sb_lib_context.error_flag == STRING_BUFFER_ERROR_FLAG_FAILED)) { + if (unlikely (RuntimeContext::get().sb_lib_context.error_flag == STRING_BUFFER_ERROR_FLAG_FAILED)) { return *this; } memcpy(buffer_end, str, len); @@ -170,7 +170,7 @@ void string_buffer::reserve(int len) { } inline void init_string_buffer_lib(string::size_type min_length, string::size_type max_length) { - string_buffer_lib_context &sb_context = KphpCoreContext::current().sb_lib_context; + string_buffer_lib_context &sb_context = RuntimeContext::get().sb_lib_context; if (min_length > 0) { sb_context.MIN_BUFFER_LEN = min_length; } diff --git a/runtime-common/core/runtime-core-context.h b/runtime-common/core/runtime-core-context.h deleted file mode 100644 index e9a659f538..0000000000 --- a/runtime-common/core/runtime-core-context.h +++ /dev/null @@ -1,31 +0,0 @@ -// Compiler for PHP (aka KPHP) -// Copyright (c) 2024 LLC «V Kontakte» -// Distributed under the GPL v3 License, see LICENSE.notice.txt - -#pragma once - -#include - -#ifndef INCLUDED_FROM_KPHP_CORE - #error "this file must be included only from runtime-core.h" -#endif - - -struct KphpCoreContext { - /** - * KphpCoreContext is used in - * @see init_php_scripts_once_in_master for runtime or - * @see vk_k2_create_image_state for runtime light - * - * before the init() function is called, so its default parameters should be as follows - **/ - static KphpCoreContext& current() noexcept; - - void init(); - void free(); - - int show_migration_php8_warning = 0; - int php_disable_warnings = 0; - uint32_t empty_obj_count = 0; - string_buffer_lib_context sb_lib_context; -}; diff --git a/runtime-common/core/runtime-core.h b/runtime-common/core/runtime-core.h index 39efe42780..c70d1a5845 100644 --- a/runtime-common/core/runtime-core.h +++ b/runtime-common/core/runtime-core.h @@ -33,7 +33,7 @@ #include "runtime-common/core/core-types/decl/string_buffer_decl.inl" #include "runtime-common/core/allocator/runtime-allocator.h" -#include "runtime-common/core/runtime-core-context.h" +#include "runtime-common/core/core-context.h" #include "runtime-common/core/core-types/definition/string.inl" #include "runtime-common/core/core-types/definition/array.inl" @@ -54,8 +54,8 @@ #define SAFE_SET_VALUE(a, b, b_type, c, c_type) ({b_type b_tmp___ = b; c_type c_tmp___ = c; (a).set_value (b_tmp___, c_tmp___);}) #define SAFE_PUSH_BACK(a, b, b_type) ({b_type b_tmp___ = b; a.push_back (b_tmp___);}) #define SAFE_PUSH_BACK_RETURN(a, b, b_type) ({b_type b_tmp___ = b; a.push_back_return (b_tmp___);}) -#define NOERR(a, a_type) ({KphpCoreContext::current().php_disable_warnings++; a_type a_tmp___ = a; KphpCoreContext::current().php_disable_warnings--; a_tmp___;}) -#define NOERR_VOID(a) ({KphpCoreContext::current().php_disable_warnings++; a; KphpCoreContext::current().php_disable_warnings--;}) +#define NOERR(a, a_type) ({RuntimeContext::get().php_disable_warnings++; a_type a_tmp___ = a; RuntimeContext::get().php_disable_warnings--; a_tmp___;}) +#define NOERR_VOID(a) ({RuntimeContext::get().php_disable_warnings++; a; RuntimeContext::get().php_disable_warnings--;}) #define f$likely likely #define f$unlikely unlikely diff --git a/runtime-common/core/utils/hash.h b/runtime-common/core/utils/hash.h index 8df06cee7d..13fdaece98 100644 --- a/runtime-common/core/utils/hash.h +++ b/runtime-common/core/utils/hash.h @@ -5,6 +5,8 @@ #include #include +#pragma once + // from boost // see https://www.boost.org/doc/libs/1_55_0/doc/html/hash/reference.html#boost.hash_combine template diff --git a/runtime-common/core/utils/migration-php8.cpp b/runtime-common/core/utils/migration-php8.cpp index ab3dd76e97..db3a9d0cf6 100644 --- a/runtime-common/core/utils/migration-php8.cpp +++ b/runtime-common/core/utils/migration-php8.cpp @@ -3,14 +3,15 @@ // Distributed under the GPL v3 License, see LICENSE.notice.txt #include "runtime-common/core/utils/migration-php8.h" + #include "runtime-common/core/runtime-core.h" void f$set_migration_php8_warning(int mask) { - KphpCoreContext::current().show_migration_php8_warning = mask; + RuntimeContext::get().show_migration_php8_warning = mask; } static void reset_migration_php8_global_vars() { - KphpCoreContext::current().show_migration_php8_warning = 0; + RuntimeContext::get().show_migration_php8_warning = 0; } void free_migration_php8() { diff --git a/runtime-common/core/utils/small-object-storage.h b/runtime-common/core/utils/small-object-storage.h index 9c66214124..0a5b1cdd41 100644 --- a/runtime-common/core/utils/small-object-storage.h +++ b/runtime-common/core/utils/small-object-storage.h @@ -31,7 +31,7 @@ union small_object_storage { template std::enable_if_t < limit emplace(Args &&...args) noexcept { - storage_ptr = RuntimeAllocator::current().alloc_script_memory(sizeof(T)); + storage_ptr = RuntimeAllocator::get().alloc_script_memory(sizeof(T)); return new (storage_ptr) T(std::forward(args)...); } template @@ -42,6 +42,6 @@ union small_object_storage { std::enable_if_t < limit destroy() noexcept { T *mem = get(); mem->~T(); - RuntimeAllocator::current().free_script_memory(mem, sizeof(T)); + RuntimeAllocator::get().free_script_memory(mem, sizeof(T)); } }; diff --git a/runtime-common/runtime-common.cmake b/runtime-common/runtime-common.cmake index 811e39970d..8b46f373f1 100644 --- a/runtime-common/runtime-common.cmake +++ b/runtime-common/runtime-common.cmake @@ -1,6 +1,7 @@ include(${RUNTIME_COMMON_DIR}/core/core.cmake) +include(${RUNTIME_COMMON_DIR}/stdlib/stdlib.cmake) -set(RUNTIME_COMMON_SRC "${CORE_SRC}") +set(RUNTIME_COMMON_SRC "${CORE_SRC}" "${STDLIB_SRC}") if(COMPILE_RUNTIME_LIGHT) set(RUNTIME_COMMON_SOURCES_FOR_COMP "${RUNTIME_COMMON_SRC}") diff --git a/runtime-common/stdlib/stdlib.cmake b/runtime-common/stdlib/stdlib.cmake new file mode 100644 index 0000000000..aaa11259a1 --- /dev/null +++ b/runtime-common/stdlib/stdlib.cmake @@ -0,0 +1,3 @@ +prepend(STDLIB_STRING stdlib/string/ string-functions.cpp) + +set(STDLIB_SRC "${STDLIB_STRING}") diff --git a/runtime-common/stdlib/string/string-context.h b/runtime-common/stdlib/string/string-context.h new file mode 100644 index 0000000000..e9aca0fbf3 --- /dev/null +++ b/runtime-common/stdlib/string/string-context.h @@ -0,0 +1,73 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2024 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#pragma once + +#include +#include +#include + +#include "common/mixin/not_copyable.h" +#include "runtime-common/core/runtime-core.h" + +namespace string_context_impl_ { + +inline constexpr auto *COLON_ = ","; +inline constexpr auto *CP1251_ = "cp1251"; +inline constexpr auto *DOT_ = "."; +inline constexpr auto *COMMA_ = ","; +inline constexpr auto *BACKSLASH_ = "\\"; +inline constexpr auto *QUOTE_ = "\""; +inline constexpr auto *NEWLINE_ = "\n"; +inline constexpr auto *SPACE_ = " "; +inline constexpr auto *WHAT_ = " \n\r\t\v\0"; +inline constexpr auto *ONE_ = "1"; +inline constexpr auto *PERCENT_ = "%"; + +}; // namespace string_context_impl_ + +class StringLibContext final : vk::not_copyable { + static constexpr int32_t MASK_BUFFER_LENGTH = 256; + +public: + static constexpr int32_t STATIC_BUFFER_LENGTH = 1U << 23U; + + std::array static_buf{}; + std::array mask_buffer{}; + + int64_t str_replace_count_dummy{}; + double default_similar_text_percent_stub{}; + + static StringLibContext &get() noexcept; +}; + +struct StringLibConstants final : vk::not_copyable { + const string COLON_STR{string_context_impl_::COLON_, static_cast(std::char_traits::length(string_context_impl_::COLON_))}; + const string CP1251_STR{string_context_impl_::CP1251_, static_cast(std::char_traits::length(string_context_impl_::CP1251_))}; + const string DOT_STR{string_context_impl_::DOT_, static_cast(std::char_traits::length(string_context_impl_::DOT_))}; + const string COMMA_STR{string_context_impl_::COMMA_, static_cast(std::char_traits::length(string_context_impl_::COMMA_))}; + const string BACKSLASH_STR{string_context_impl_::BACKSLASH_, + static_cast(std::char_traits::length(string_context_impl_::BACKSLASH_))}; + const string QUOTE_STR{string_context_impl_::QUOTE_, static_cast(std::char_traits::length(string_context_impl_::QUOTE_))}; + const string NEWLINE_STR{string_context_impl_::NEWLINE_, static_cast(std::char_traits::length(string_context_impl_::NEWLINE_))}; + const string SPACE_STR{string_context_impl_::SPACE_, static_cast(std::char_traits::length(string_context_impl_::SPACE_))}; + // +1 here to since char_traits::length doesn't count '\0' at the end + const string WHAT_STR{string_context_impl_::WHAT_, static_cast(std::char_traits::length(string_context_impl_::WHAT_)) + 1}; + const string ONE_STR{string_context_impl_::ONE_, static_cast(std::char_traits::length(string_context_impl_::ONE_))}; + const string PERCENT_STR{string_context_impl_::PERCENT_, static_cast(std::char_traits::length(string_context_impl_::PERCENT_))}; + + const char lhex_digits[17] = "0123456789abcdef"; + const char uhex_digits[17] = "0123456789ABCDEF"; + + static constexpr int64_t ENT_HTML401 = 0; + static constexpr int64_t ENT_COMPAT = 0; + static constexpr int64_t ENT_QUOTES = 1; + static constexpr int64_t ENT_NOQUOTES = 2; + + static constexpr int64_t STR_PAD_LEFT = 0; + static constexpr int64_t STR_PAD_RIGHT = 1; + static constexpr int64_t STR_PAD_BOTH = 2; + + static const StringLibConstants &get() noexcept; +}; diff --git a/runtime-common/stdlib/string/string-functions.cpp b/runtime-common/stdlib/string/string-functions.cpp new file mode 100644 index 0000000000..0c76c9567c --- /dev/null +++ b/runtime-common/stdlib/string/string-functions.cpp @@ -0,0 +1,2756 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2024 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#include "runtime-common/stdlib/string/string-functions.h" + +#include +#include +#include +#include + +#include "common/macos-ports.h" +#include "common/unicode/unicode-utils.h" +#include "common/wrappers/string_view.h" +#include "runtime-common/core/runtime-core.h" +#include "runtime-common/stdlib/string/string-context.h" + +const char *get_mask(const string &what) noexcept { + auto &mask{StringLibContext::get().mask_buffer}; + std::memset(mask.data(), 0, mask.size()); + + int len = what.size(); + for (int i = 0; i < len; i++) { + unsigned char c = what[i]; + if (what[i + 1] == '.' && what[i + 2] == '.' && static_cast(what[i + 3]) >= c) { + std::memset(mask.data() + c, 1, static_cast(what[i + 3]) - c + 1); + i += 3; + } else if (c == '.' && what[i + 1] == '.') { + php_warning("Invalid '..'-range in string \"%s\" at position %d.", what.c_str(), i); + } else { + mask[c] = 1; + } + } + + return mask.data(); +} + +string f$addcslashes(const string &str, const string &what) noexcept { + const char *mask = get_mask(what); + + int len = str.size(); + auto &static_SB = RuntimeContext::get().static_SB; + static_SB.clean().reserve(4 * len); + + for (int i = 0; i < len; i++) { + unsigned char c = str[i]; + if (mask[c]) { + static_SB.append_char('\\'); + if (c < 32 || c > 126) { + switch (c) { + case '\n': + static_SB.append_char('n'); + break; + case '\t': + static_SB.append_char('t'); + break; + case '\r': + static_SB.append_char('r'); + break; + case '\a': + static_SB.append_char('a'); + break; + case '\v': + static_SB.append_char('v'); + break; + case '\b': + static_SB.append_char('b'); + break; + case '\f': + static_SB.append_char('f'); + break; + default: + static_SB.append_char(static_cast((c >> 6) + '0')); + static_SB.append_char(static_cast(((c >> 3) & 7) + '0')); + static_SB.append_char(static_cast((c & 7) + '0')); + } + } else { + static_SB.append_char(c); + } + } else { + static_SB.append_char(c); + } + } + return static_SB.str(); +} + +string f$addslashes(const string &str) noexcept { + int len = str.size(); + + auto &static_SB = RuntimeContext::get().static_SB; + static_SB.clean().reserve(2 * len); + for (int i = 0; i < len; i++) { + switch (str[i]) { + case '\0': + static_SB.append_char('\\'); + static_SB.append_char('0'); + break; + case '\'': + case '\"': + case '\\': + static_SB.append_char('\\'); + /* fallthrough */ + default: + static_SB.append_char(str[i]); + } + } + return static_SB.str(); +} + +constexpr unsigned char win_to_koi[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 154, 174, 190, 46, 159, 189, 46, 46, + 179, 191, 180, 157, 46, 46, 156, 183, 46, 46, 182, 166, 173, 46, 46, 158, 163, 152, 164, 155, 46, 46, 46, 167, + 225, 226, 247, 231, 228, 229, 246, 250, 233, 234, 235, 236, 237, 238, 239, 240, 242, 243, 244, 245, 230, 232, 227, 254, + 251, 253, 255, 249, 248, 252, 224, 241, 193, 194, 215, 199, 196, 197, 214, 218, 201, 202, 203, 204, 205, 206, 207, 208, + 210, 211, 212, 213, 198, 200, 195, 222, 219, 221, 223, 217, 216, 220, 192, 209}; + +constexpr unsigned char koi_to_win[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 184, 186, 32, 179, 191, + 32, 32, 32, 32, 32, 180, 162, 32, 32, 32, 32, 168, 170, 32, 178, 175, 32, 32, 32, 32, 32, 165, 161, 169, + 254, 224, 225, 246, 228, 229, 244, 227, 245, 232, 233, 234, 235, 236, 237, 238, 239, 255, 240, 241, 242, 243, 230, 226, + 252, 251, 231, 248, 253, 249, 247, 250, 222, 192, 193, 214, 196, 197, 212, 195, 213, 200, 201, 202, 203, 204, 205, 206, + 207, 223, 208, 209, 210, 211, 198, 194, 220, 219, 199, 216, 221, 217, 215, 218}; + +string f$convert_cyr_string(const string &str, const string &from_s, const string &to_s) noexcept { + char from = static_cast(toupper(from_s[0])); + char to = static_cast(toupper(to_s[0])); + + const unsigned char *table = nullptr; + if (from == 'W' && to == 'K') { + table = win_to_koi; + } + if (from == 'K' && to == 'W') { + table = koi_to_win; + } + if (table == nullptr) { + php_critical_error("unsupported conversion from '%c' to '%c' in function convert_cyr_string", from, to); + return str; + } + + int len = str.size(); + string result(len, false); + for (int i = 0; i < len; i++) { + result[i] = table[static_cast(str[i])]; + } + return result; +} + +mixed f$count_chars(const string &str, int64_t mode) noexcept { + int64_t chars[256] = {0}; + + if (static_cast(mode) > 4U) { + php_warning("Unknown mode %" PRIi64, mode); + return false; + } + + const string::size_type len = str.size(); + for (string::size_type i = 0; i < len; i++) { + chars[static_cast(str[i])]++; + } + + if (mode <= 2) { + array result; + for (int64_t i = 0; i < 256; i++) { + if ((mode != 2 && chars[i] != 0) || (mode != 1 && chars[i] == 0)) { + result.set_value(i, chars[i]); + } + } + return result; + } + + string result; + for (int i = 0; i < 256; i++) { + if ((mode == 3) == (chars[i] != 0)) { + result.push_back(static_cast(i)); + } + } + return result; +} + +string f$hex2bin(const string &str) noexcept { + int len = str.size(); + if (len & 1) { + php_warning("Wrong argument \"%s\" supplied for function hex2bin", str.c_str()); + return {}; + } + + string result(len / 2, false); + for (int i = 0; i < len; i += 2) { + int num_high = hex_to_int(str[i]); + int num_low = hex_to_int(str[i + 1]); + if (num_high == 16 || num_low == 16) { + php_warning("Wrong argument \"%s\" supplied for function hex2bin", str.c_str()); + return {}; + } + result[i / 2] = static_cast((num_high << 4) + num_low); + } + + return result; +} + +constexpr int entities_size = 251; + +static const char *const ent_to_num_s[entities_size] = + {"AElig", "Aacute", "Acirc", "Agrave", "Alpha", "Aring", "Atilde", "Auml", "Beta", "Ccedil", "Chi", "Dagger", "Delta", "ETH", "Eacute", + "Ecirc", "Egrave", "Epsilon", "Eta", "Euml", "Gamma", "Iacute", "Icirc", "Igrave", "Iota", "Iuml", "Kappa", "Lambda", "Mu", "Ntilde", + "Nu", "OElig", "Oacute", "Ocirc", "Ograve", "Omega", "Omicron", "Oslash", "Otilde", "Ouml", "Phi", "Pi", "Prime", "Psi", "Rho", + "Scaron", "Sigma", "THORN", "Tau", "Theta", "Uacute", "Ucirc", "Ugrave", "Upsilon", "Uuml", "Xi", "Yacute", "Yuml", "Zeta", "aacute", + "acirc", "acute", "aelig", "agrave", "alefsym", "alpha", "amp", "and", "ang", "aring", "asymp", "atilde", "auml", "bdquo", "beta", + "brvbar", "bull", "cap", "ccedil", "cedil", "cent", "chi", "circ", "clubs", "cong", "copy", "crarr", "cup", "curren", "dArr", + "dagger", "darr", "deg", "delta", "diams", "divide", "eacute", "ecirc", "egrave", "empty", "emsp", "ensp", "epsilon", "equiv", "eta", + "eth", "euml", "euro", "exist", "fnof", "forall", "frac12", "frac14", "frac34", "frasl", "gamma", "ge", "gt", "hArr", "harr", + "hearts", "hellip", "iacute", "icirc", "iexcl", "igrave", "image", "infin", "int", "iota", "iquest", "isin", "iuml", "kappa", "lArr", + "lambda", "lang", "laquo", "larr", "lceil", "ldquo", "le", "lfloor", "lowast", "loz", "lrm", "lsaquo", "lsquo", "lt", "macr", + "mdash", "micro", "middot", "minus", "mu", "nabla", "nbsp", "ndash", "ne", "ni", "not", "notin", "nsub", "ntilde", "nu", + "oacute", "ocirc", "oelig", "ograve", "oline", "omega", "omicron", "oplus", "or", "ordf", "ordm", "oslash", "otilde", "otimes", "ouml", + "para", "part", "permil", "perp", "phi", "pi", "piv", "plusmn", "pound", "prime", "prod", "prop", "psi", "rArr", "radic", + "rang", "raquo", "rarr", "rceil", "rdquo", "real", "reg", "rfloor", "rho", "rlm", "rsaquo", "rsquo", "sbquo", "scaron", "sdot", + "sect", "shy", "sigma", "sigmaf", "sim", "spades", "sub", "sube", "sum", "sup", "sup1", "sup2", "sup3", "supe", "szlig", + "tau", "there4", "theta", "thetasym", "thinsp", "thorn", "tilde", "times", "trade", "uArr", "uacute", "uarr", "ucirc", "ugrave", "uml", + "upsih", "upsilon", "uuml", "weierp", "xi", "yacute", "yen", "yuml", "zeta", "zwj", "zwnj"}; + +constexpr int32_t ent_to_num_i[entities_size] = {198, 193, 194, 192, 913, 197, 195, 196, 914, 199, 935, 8225, 916, 208, 201, 202, 200, 917, + 919, 203, 915, 205, 206, 204, 921, 207, 922, 923, 924, 209, 925, 338, 211, 212, 210, 937, + 927, 216, 213, 214, 934, 928, 8243, 936, 929, 352, 931, 222, 932, 920, 218, 219, 217, 933, + 220, 926, 221, 376, 918, 225, 226, 180, 230, 224, 8501, 945, 38, 8743, 8736, 229, 8776, 227, + 228, 8222, 946, 166, 8226, 8745, 231, 184, 162, 967, 710, 9827, 8773, 169, 8629, 8746, 164, 8659, + 8224, 8595, 176, 948, 9830, 247, 233, 234, 232, 8709, 8195, 8194, 949, 8801, 951, 240, 235, 8364, + 8707, 402, 8704, 189, 188, 190, 8260, 947, 8805, 62, 8660, 8596, 9829, 8230, 237, 238, 161, 236, + 8465, 8734, 8747, 953, 191, 8712, 239, 954, 8656, 955, 9001, 171, 8592, 8968, 8220, 8804, 8970, 8727, + 9674, 8206, 8249, 8216, 60, 175, 8212, 181, 183, 8722, 956, 8711, 160, 8211, 8800, 8715, 172, 8713, + 8836, 241, 957, 243, 244, 339, 242, 8254, 969, 959, 8853, 8744, 170, 186, 248, 245, 8855, 246, + 182, 8706, 8240, 8869, 966, 960, 982, 177, 163, 8242, 8719, 8733, 968, 8658, 8730, 9002, 187, 8594, + 8969, 8221, 8476, 174, 8971, 961, 8207, 8250, 8217, 8218, 353, 8901, 167, 173, 963, 962, 8764, 9824, + 8834, 8838, 8721, 8835, 185, 178, 179, 8839, 223, 964, 8756, 952, 977, 8201, 254, 732, 215, 8482, + 8657, 250, 8593, 251, 249, 168, 978, 965, 252, 8472, 958, 253, 165, 255, 950, 8205, 8204}; +/* +static int cp1251_to_utf8[128] = { + 0x402, 0x403, 0x201A, 0x453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x409, 0x2039, 0x40A, 0x40C, 0x40B, 0x40F, + 0x452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0, 0x2122, 0x459, 0x203A, 0x45A, 0x45C, 0x45B, 0x45F, + 0xA0, 0x40E, 0x45E, 0x408, 0xA4, 0x490, 0xA6, 0xA7, 0x401, 0xA9, 0x404, 0xAB, 0xAC, 0xAD, 0xAE, 0x407, + 0xB0, 0xB1, 0x406, 0x456, 0x491, 0xB5, 0xB6, 0xB7, 0x451, 0x2116, 0x454, 0xBB, 0x458, 0x405, 0x455, 0x457, + 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417, 0x418, 0x419, 0x41A, 0x41B, 0x41C, 0x41D, 0x41E, 0x41F, + 0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427, 0x428, 0x429, 0x42A, 0x42B, 0x42C, 0x42D, 0x42E, 0x42F, + 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, 0x438, 0x439, 0x43A, 0x43B, 0x43C, 0x43D, 0x43E, 0x43F, + 0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x44A, 0x44B, 0x44C, 0x44D, 0x44E, 0x44F}; +*/ +static const char *const cp1251_to_utf8_str[128] = + {"Ђ", "Ѓ", "‚", "ѓ", "„", "…", "†", "‡", "€", "‰", "Љ", "‹", "Њ", + "Ќ", "Ћ", "Џ", "ђ", "‘", "’", "‛", "“", "•", "–", "—", "", "™", + "љ", "›", "њ", "ќ", "ћ", "џ", " ", "Ў", "ў", "Ј", "¤", "Ґ", "¦", + "§", "Ё", "©", "Є", "«", "¬", "­", "®", "Ї", "°", "±", "І", "і", + "ґ", "µ", "¶", "·", "ё", "№", "є", "»", "ј", "Ѕ", "ѕ", "ї", "А", + "Б", "В", "Г", "Д", "Е", "Ж", "З", "И", "Й", "К", "Л", "М", "Н", + "О", "П", "Р", "С", "Т", "У", "Ф", "Х", "Ц", "Ч", "Ш", "Щ", "Ъ", + "Ы", "Ь", "Э", "Ю", "Я", "а", "б", "в", "г", "д", "е", "ж", "з", + "и", "й", "к", "л", "м", "н", "о", "п", "р", "с", "т", "у", "ф", + "х", "ц", "ч", "ш", "щ", "ъ", "ы", "ь", "э", "ю", "я"}; + +string f$htmlentities(const string &str) noexcept { + int len = static_cast(str.size()); + auto &static_SB = RuntimeContext::get().static_SB; + static_SB.clean().reserve(8 * len); + + for (int i = 0; i < len; i++) { + switch (str[i]) { + case '&': + static_SB.append_char('&'); + static_SB.append_char('a'); + static_SB.append_char('m'); + static_SB.append_char('p'); + static_SB.append_char(';'); + break; + case '"': + static_SB.append_char('&'); + static_SB.append_char('q'); + static_SB.append_char('u'); + static_SB.append_char('o'); + static_SB.append_char('t'); + static_SB.append_char(';'); + break; + case '<': + static_SB.append_char('&'); + static_SB.append_char('l'); + static_SB.append_char('t'); + static_SB.append_char(';'); + break; + case '>': + static_SB.append_char('&'); + static_SB.append_char('g'); + static_SB.append_char('t'); + static_SB.append_char(';'); + break; + default: + if (str[i] < 0) { + const char *utf8_str = cp1251_to_utf8_str[128 + str[i]]; + static_SB.append_unsafe(utf8_str, static_cast(strlen(utf8_str))); + } else { + static_SB.append_char(str[i]); + } + } + } + + return static_SB.str(); +} + +string f$html_entity_decode(const string &str, int64_t flags, const string &encoding) noexcept { + if (flags >= 3) { + php_critical_error("unsupported parameter flags = %" PRIi64 " in function html_entity_decode", flags); + } + + bool utf8 = memchr(encoding.c_str(), '8', encoding.size()) != nullptr; + if (!utf8 && strstr(encoding.c_str(), "1251") == nullptr) { + php_critical_error("unsupported encoding \"%s\" in function html_entity_decode", encoding.c_str()); + return str; + } + + int len = str.size(); + string res(len * 7 / 4 + 4, false); + char *p = &res[0]; + for (int i = 0; i < len; i++) { + if (str[i] == '&') { + int j = i + 1; + while (j < len && str[j] != ';') { + j++; + } + if (j < len) { + if ((flags & StringLibConstants::ENT_QUOTES) && j == i + 5) { + if (str[i + 1] == '#' && str[i + 2] == '0' && str[i + 3] == '3' && str[i + 4] == '9') { + i += 5; + *p++ = '\''; + continue; + } + } + if (!(flags & StringLibConstants::ENT_NOQUOTES) && j == i + 5) { + if (str[i + 1] == 'q' && str[i + 2] == 'u' && str[i + 3] == 'o' && str[i + 4] == 't') { + i += 5; + *p++ = '\"'; + continue; + } + } + + int l = 0; + int r = entities_size; + while (l + 1 < r) { + int m = (l + r) >> 1; + if (strncmp(str.c_str() + i + 1, ent_to_num_s[m], j - i - 1) < 0) { + r = m; + } else { + l = m; + } + } + if (strncmp(str.c_str() + i + 1, ent_to_num_s[l], j - i - 1) == 0) { + int num = ent_to_num_i[l]; + i = j; + if (utf8) { + if (num < 128) { + *p++ = static_cast(num); + } else if (num < 0x800) { + *p++ = static_cast(0xc0 + (num >> 6)); + *p++ = static_cast(0x80 + (num & 63)); + } else { + *p++ = static_cast(0xe0 + (num >> 12)); + *p++ = static_cast(0x80 + ((num >> 6) & 63)); + *p++ = static_cast(0x80 + (num & 63)); + } + } else { + if (num < 128) { + *p++ = static_cast(num); + } else { + *p++ = '&'; + *p++ = '#'; + if (num >= 1000) { + *p++ = static_cast(num / 1000 % 10 + '0'); + } + *p++ = static_cast(num / 100 % 10 + '0'); + *p++ = static_cast(num / 10 % 10 + '0'); + *p++ = static_cast(num % 10 + '0'); + *p++ = ';'; + } + } + continue; + } + } + } + + *p++ = str[i]; + } + res.shrink(static_cast(p - res.c_str())); + + return res; +} + +string f$htmlspecialchars(const string &str, int64_t flags) noexcept { + if (flags >= 3) { + php_critical_error("unsupported parameter flags = %" PRIi64 " in function htmlspecialchars", flags); + } + + const string::size_type len = str.size(); + auto &static_SB = RuntimeContext::get().static_SB; + static_SB.clean().reserve(6 * len); + + for (string::size_type i = 0; i < len; i++) { + switch (str[i]) { + case '&': + static_SB.append_char('&'); + static_SB.append_char('a'); + static_SB.append_char('m'); + static_SB.append_char('p'); + static_SB.append_char(';'); + break; + case '"': + if (!(flags & StringLibConstants::ENT_NOQUOTES)) { + static_SB.append_char('&'); + static_SB.append_char('q'); + static_SB.append_char('u'); + static_SB.append_char('o'); + static_SB.append_char('t'); + static_SB.append_char(';'); + } else { + static_SB.append_char('"'); + } + break; + case '\'': + if (flags & StringLibConstants::ENT_QUOTES) { + static_SB.append_char('&'); + static_SB.append_char('#'); + static_SB.append_char('0'); + static_SB.append_char('3'); + static_SB.append_char('9'); + static_SB.append_char(';'); + } else { + static_SB.append_char('\''); + } + break; + case '<': + static_SB.append_char('&'); + static_SB.append_char('l'); + static_SB.append_char('t'); + static_SB.append_char(';'); + break; + case '>': + static_SB.append_char('&'); + static_SB.append_char('g'); + static_SB.append_char('t'); + static_SB.append_char(';'); + break; + default: + static_SB.append_char(str[i]); + } + } + + return static_SB.str(); +} + +string f$htmlspecialchars_decode(const string &str, int64_t flags) noexcept { + if (flags >= 3) { + php_critical_error("unsupported parameter flags = %" PRIi64 " in function htmlspecialchars_decode", flags); + } + + int len = str.size(); + string res(len, false); + char *p = &res[0]; + for (int i = 0; i < len;) { + if (str[i] == '&') { + if (str[i + 1] == 'a' && str[i + 2] == 'm' && str[i + 3] == 'p' && str[i + 4] == ';') { + *p++ = '&'; + i += 5; + } else if (str[i + 1] == 'q' && str[i + 2] == 'u' && str[i + 3] == 'o' && str[i + 4] == 't' && str[i + 5] == ';' + && !(flags & StringLibConstants::ENT_NOQUOTES)) { + *p++ = '"'; + i += 6; + } else if (str[i + 1] == '#' && str[i + 2] == '0' && str[i + 3] == '3' && str[i + 4] == '9' && str[i + 5] == ';' + && (flags & StringLibConstants::ENT_QUOTES)) { + *p++ = '\''; + i += 6; + } else if (str[i + 1] == 'l' && str[i + 2] == 't' && str[i + 3] == ';') { + *p++ = '<'; + i += 4; + } else if (str[i + 1] == 'g' && str[i + 2] == 't' && str[i + 3] == ';') { + *p++ = '>'; + i += 4; + } else { + *p++ = '&'; + i++; + } + } else { + *p++ = str[i]; + i++; + } + } + res.shrink(static_cast(p - res.c_str())); + + return res; +} + +int64_t f$levenshtein(const string &str1, const string &str2) noexcept { + string::size_type len1 = str1.size(); + string::size_type len2 = str2.size(); + + const string::size_type MAX_LEN = 16384; + if (len1 > MAX_LEN || len2 > MAX_LEN) { + php_warning("Too long strings of length %u and %u supplied for function levenshtein. Maximum allowed length is %u.", len1, len2, MAX_LEN); + if (len1 > MAX_LEN) { + len1 = MAX_LEN; + } + if (len2 > MAX_LEN) { + len2 = MAX_LEN; + } + } + + int64_t dp[2][MAX_LEN + 1]; + + for (string::size_type j = 0; j <= len2; j++) { + dp[0][j] = j; + } + + for (string::size_type i = 1; i <= len1; i++) { + dp[i & 1][0] = i; + for (string::size_type j = 1; j <= len2; j++) { + if (str1[i - 1] == str2[j - 1]) { + dp[i & 1][j] = dp[(i - 1) & 1][j - 1]; + } else { + int64_t res = dp[(i - 1) & 1][j - 1]; + if (dp[(i - 1) & 1][j] < res) { + res = dp[(i - 1) & 1][j]; + } + if (dp[i & 1][j - 1] < res) { + res = dp[i & 1][j - 1]; + } + dp[i & 1][j] = res + 1; + } + } + } + return dp[len1 & 1][len2]; +} + +string f$mysql_escape_string(const string &str) noexcept { + int len = str.size(); + auto &static_SB = RuntimeContext::get().static_SB; + static_SB.clean().reserve(2 * len); + for (int i = 0; i < len; i++) { + switch (str[i]) { + case '\0': + case '\n': + case '\r': + case 26: + case '\'': + case '\"': + case '\\': + static_SB.append_char('\\'); + /* fallthrough */ + default: + static_SB.append_char(str[i]); + } + } + return static_SB.str(); +} + +string f$nl2br(const string &str, bool is_xhtml) noexcept { + const char *br = is_xhtml ? "
" : "
"; + int br_len = static_cast(strlen(br)); + + int len = str.size(); + auto &static_SB = RuntimeContext::get().static_SB; + static_SB.clean().reserve((br_len + 1) * len); + + for (int i = 0; i < len;) { + if (str[i] == '\n' || str[i] == '\r') { + static_SB.append_unsafe(br, br_len); + if (str[i] + str[i + 1] == '\n' + '\r') { + static_SB.append_char(str[i++]); + } + } + static_SB.append_char(str[i++]); + } + + return static_SB.str(); +} + +string f$number_format(double number, int64_t decimals, const string &dec_point, const string &thousands_sep) noexcept { + char *result_begin = StringLibContext::get().static_buf.data() + StringLibContext::STATIC_BUFFER_LENGTH; + + if (decimals < 0 || decimals > 100) { + php_warning("Wrong parameter decimals (%" PRIi64 ") in function number_format", decimals); + return {}; + } + bool negative = false; + if (number < 0) { + negative = true; + number *= -1; + } + + double frac = number - floor(number); + number -= frac; + + double mul = pow(10.0, static_cast(decimals)); + frac = round(frac * mul + 1e-9); + + int64_t old_decimals = decimals; + while (result_begin > StringLibContext::get().static_buf.data() && decimals--) { + double x = floor(frac * 0.1 + 0.05); + auto y = static_cast(frac - x * 10 + 0.05); + if (static_cast(y) >= 10U) { + y = 0; + } + frac = x; + + *--result_begin = static_cast(y + '0'); + } + number += frac; + + if (old_decimals > 0) { + string::size_type i = dec_point.size(); + while (result_begin > StringLibContext::get().static_buf.data() && i > 0) { + *--result_begin = dec_point[--i]; + } + } + + int64_t digits = 0; + do { + if (digits && digits % 3 == 0) { + string::size_type i = thousands_sep.size(); + while (result_begin > StringLibContext::get().static_buf.data() && i > 0) { + *--result_begin = thousands_sep[--i]; + } + } + digits++; + + if (result_begin > StringLibContext::get().static_buf.data()) { + double x = floor(number * 0.1 + 0.05); + auto y = static_cast((number - x * 10 + 0.05)); + if (static_cast(y) >= 10U) { + y = 0; + } + number = x; + + *--result_begin = static_cast(y + '0'); + } + } while (result_begin > StringLibContext::get().static_buf.data() && number > 0.5); + + if (result_begin > StringLibContext::get().static_buf.data() && negative) { + *--result_begin = '-'; + } + + if (result_begin <= StringLibContext::get().static_buf.data()) { + php_critical_error("maximum length of result (%d) exceeded", StringLibContext::STATIC_BUFFER_LENGTH); + return {}; + } + + return {result_begin, static_cast(StringLibContext::get().static_buf.data() + StringLibContext::STATIC_BUFFER_LENGTH - result_begin)}; +} + +static uint64_t float64_bits(double f) { + uint64_t bits = 0; + std::memcpy(&bits, &f, sizeof(uint64_t)); + return bits; +} + +static double float64_from_bits(uint64_t bits) { + double f = 0; + std::memcpy(&f, &bits, sizeof(uint64_t)); + return f; +} + +string f$pack(const string &pattern, const array &a) noexcept { + auto &static_SB = RuntimeContext::get().static_SB; + static_SB.clean(); + int cur_arg = 0; + for (int i = 0; i < static_cast(pattern.size());) { + if (pattern[i] == '*') { + if (i > 0) { + --i; + } + } + char format = pattern[i++]; + int cnt = 1; + if ('0' <= pattern[i] && pattern[i] <= '9') { + cnt = 0; + do { + cnt = cnt * 10 + pattern[i++] - '0'; + } while ('0' <= pattern[i] && pattern[i] <= '9'); + + if (cnt <= 0) { + php_warning("Wrong count specifier in pattern \"%s\"", pattern.c_str()); + return {}; + } + } else if (pattern[i] == '*') { + cnt = 0; + } + + int arg_num = cur_arg; + if (arg_num >= a.count()) { + if (format == 'A' || format == 'a' || format == 'H' || format == 'h' || cnt != 0) { + php_warning("Not enough parameters to call function pack"); + return {}; + } + if (i + 1 != static_cast(pattern.size())) { + php_warning("Misplaced symbol '*' in pattern \"%s\"", pattern.c_str()); + return {}; + } + break; + } + cur_arg++; + + mixed arg = a.get_value(arg_num); + + if (arg.is_array()) { + php_warning("Argument %d of function pack is array", arg_num); + return {}; + } + + char filler = 0; + switch (format) { + case 'A': + filler = ' '; + /* fallthrough */ + case 'a': { + string arg_str = arg.to_string(); + int len = arg_str.size(); + if (!cnt) { + cnt = len; + i++; + } + static_SB.append(arg_str.c_str(), static_cast(min(cnt, len))); + while (cnt > len) { + static_SB << filler; + cnt--; + } + break; + } + case 'h': + case 'H': { + string arg_str = arg.to_string(); + int len = arg_str.size(); + if (!cnt) { + cnt = len; + i++; + } + for (int j = 0; cnt > 0 && j < len; j += 2) { + int num_high = hex_to_int(arg_str[j]); + int num_low = cnt > 1 ? hex_to_int(arg_str[j + 1]) : 0; + cnt -= 2; + if (num_high == 16 || num_low == 16) { + php_warning("Wrong argument \"%s\" supplied for format '%c' in function pack", arg_str.c_str(), format); + return {}; + } + if (format == 'H') { + static_SB << static_cast((num_high << 4) + num_low); + } else { + static_SB << static_cast((num_low << 4) + num_high); + } + } + if (cnt > 0) { + php_warning("Type %c: not enough characters in string \"%s\" in function pack", format, arg_str.c_str()); + } + break; + } + + default: + do { + switch (format) { + case 'c': + case 'C': + static_SB << static_cast(arg.to_int()); + break; + case 's': + case 'S': + case 'v': { + unsigned short value = static_cast(arg.to_int()); + static_SB.append(reinterpret_cast(&value), 2); + break; + } + case 'n': { + unsigned short value = static_cast(arg.to_int()); + static_SB << static_cast(value >> 8) << static_cast(value & 255); + break; + } + case 'i': + case 'I': + case 'l': + case 'L': + case 'V': { + auto value = static_cast(arg.to_int()); + static_SB.append(reinterpret_cast(&value), 4); + break; + } + case 'N': { + auto value = static_cast(arg.to_int()); + static_SB << static_cast(value >> 24) << static_cast((value >> 16) & 255) << static_cast((value >> 8) & 255) + << static_cast(value & 255); + break; + } + case 'f': { + auto value = static_cast(arg.to_float()); + static_SB.append(reinterpret_cast(&value), sizeof(float)); + break; + } + case 'e': + case 'E': + case 'd': { + double value = arg.to_float(); + uint64_t value_byteordered = float64_bits(value); + if (format == 'e') { + value_byteordered = htole64(value_byteordered); + } else if (format == 'E') { + value_byteordered = htobe64(value_byteordered); + } + static_SB.append(reinterpret_cast(&value_byteordered), sizeof(uint64_t)); + break; + } + case 'J': + case 'P': + case 'Q': { + // stored in the host machine order by the default (Q flag) + unsigned long long value_byteordered = static_cast(arg.to_string().to_int()); + if (format == 'P') { + // for P encode in little endian order + value_byteordered = htole64(value_byteordered); + } else if (format == 'J') { + // for J encode in big endian order + value_byteordered = htobe64(value_byteordered); + } + + static_SB.append(reinterpret_cast(&value_byteordered), sizeof(unsigned long long)); + break; + } + case 'q': { + int64_t value = arg.to_string().to_int(); + static_SB.append(reinterpret_cast(&value), sizeof(long long)); + break; + } + default: + php_warning("Format code \"%c\" not supported", format); + return {}; + } + + if (cnt > 1) { + arg_num = cur_arg++; + if (arg_num >= a.count()) { + php_warning("Not enough parameters to call function pack"); + return {}; + } + + arg = a.get_value(arg_num); + + if (arg.is_array()) { + php_warning("Argument %d of function pack is array", arg_num); + return {}; + } + } + } while (--cnt > 0); + } + } + + php_assert(cur_arg <= a.count()); + if (cur_arg < a.count()) { + php_warning("Too much arguments to call pack with format \"%s\"", pattern.c_str()); + } + + return static_SB.str(); +} + +string f$ltrim(const string &s, const string &what) noexcept { + const char *mask = get_mask(what); + + int len = static_cast(s.size()); + if (len == 0 || !mask[static_cast(s[0])]) { + return s; + } + + int l = 1; + while (l < len && mask[static_cast(s[l])]) { + l++; + } + return {s.c_str() + l, static_cast(len - l)}; +} + +string f$rtrim(const string &s, const string &what) noexcept { + const char *mask = get_mask(what); + + int len = static_cast(s.size()) - 1; + if (len == -1 || !mask[static_cast(s[len])]) { + return s; + } + + while (len > 0 && mask[static_cast(s[len - 1])]) { + len--; + } + + return {s.c_str(), static_cast(len)}; +} + +string f$sprintf(const string &format, const array &a) noexcept { + auto &static_SB = RuntimeContext::get().static_SB; + string result; + result.reserve_at_least(format.size()); + int cur_arg = 0; + bool error_too_big = false; + for (int i = 0; i < static_cast(format.size()); i++) { + if (format[i] != '%') { + result.push_back(format[i]); + continue; + } + i++; + + int parsed_arg_num = 0; + int j = 0; + for (j = i; '0' <= format[j] && format[j] <= '9'; j++) { + parsed_arg_num = parsed_arg_num * 10 + format[j] - '0'; + } + int arg_num = -2; + if (format[j] == '$') { + i = j + 1; + arg_num = parsed_arg_num - 1; + } + + char sign = 0; + if (format[i] == '+') { + sign = format[i++]; + } + + char filler = ' '; + if (format[i] == '0' || format[i] == ' ') { + filler = format[i++]; + } else if (format[i] == '\'') { + i++; + filler = format[i++]; + } + + int pad_right = false; + if (format[i] == '-') { + pad_right = true; + i++; + } + + int width = 0; + while ('0' <= format[i] && format[i] <= '9' && width < StringLibContext::STATIC_BUFFER_LENGTH) { + width = width * 10 + format[i++] - '0'; + } + + if (width >= StringLibContext::STATIC_BUFFER_LENGTH) { + error_too_big = true; + break; + } + + int precision = -1; + if (format[i] == '.' && '0' <= format[i + 1] && format[i + 1] <= '9') { + precision = format[i + 1] - '0'; + i += 2; + while ('0' <= format[i] && format[i] <= '9' && precision < StringLibContext::STATIC_BUFFER_LENGTH) { + precision = precision * 10 + format[i++] - '0'; + } + } + + if (precision >= StringLibContext::STATIC_BUFFER_LENGTH) { + error_too_big = true; + break; + } + + string piece; + if (format[i] == '%') { + piece = StringLibConstants::get().PERCENT_STR; + } else { + if (arg_num == -2) { + arg_num = cur_arg++; + } + + if (arg_num >= a.count()) { + php_warning("Not enough parameters to call function sprintf with format \"%s\"", format.c_str()); + return {}; + } + + if (arg_num == -1) { + php_warning("Wrong parameter number 0 specified in function sprintf with format \"%s\"", format.c_str()); + return {}; + } + + const mixed &arg = a.get_value(arg_num); + + if (arg.is_array()) { + php_warning("Argument %d of function sprintf is array", arg_num); + return {}; + } + + switch (format[i]) { + case 'b': { + auto arg_int = static_cast(arg.to_int()); + int cur_pos = 70; + do { + StringLibContext::get().static_buf[--cur_pos] = static_cast((arg_int & 1) + '0'); + arg_int >>= 1; + } while (arg_int > 0); + piece.assign(StringLibContext::get().static_buf.data() + cur_pos, 70 - cur_pos); + break; + } + case 'c': { + int64_t arg_int = arg.to_int(); + if (arg_int <= -128 || arg_int > 255) { + php_warning("Wrong parameter for specifier %%c in function sprintf with format \"%s\"", format.c_str()); + } + piece.assign(1, static_cast(arg_int)); + break; + } + case 'd': { + int64_t arg_int = arg.to_int(); + if (sign == '+' && arg_int >= 0) { + piece = (static_SB.clean() << "+" << arg_int).str(); + } else { + piece = string(arg_int); + } + break; + } + case 'u': { + auto arg_int = static_cast(arg.to_int()); + int cur_pos = 70; + do { + StringLibContext::get().static_buf[--cur_pos] = static_cast(arg_int % 10 + '0'); + arg_int /= 10; + } while (arg_int > 0); + piece.assign(StringLibContext::get().static_buf.data() + cur_pos, 70 - cur_pos); + break; + } + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': { + double arg_float = arg.to_float(); + + static_SB.clean() << '%'; + if (sign) { + static_SB << sign; + } + if (precision >= 0) { + static_SB << '.' << precision; + } + static_SB << format[i]; + + int len = snprintf(StringLibContext::get().static_buf.data(), StringLibContext::STATIC_BUFFER_LENGTH, static_SB.c_str(), arg_float); + if (len >= StringLibContext::STATIC_BUFFER_LENGTH) { + error_too_big = true; + break; + } + + piece.assign(StringLibContext::get().static_buf.data(), len); + break; + } + case 'o': { + auto arg_int = static_cast(arg.to_int()); + int cur_pos = 70; + do { + StringLibContext::get().static_buf[--cur_pos] = static_cast((arg_int & 7) + '0'); + arg_int >>= 3; + } while (arg_int > 0); + piece.assign(StringLibContext::get().static_buf.data() + cur_pos, 70 - cur_pos); + break; + } + case 's': { + string arg_string = arg.to_string(); + + static_SB.clean() << '%'; + if (precision >= 0) { + static_SB << '.' << precision; + } + static_SB << 's'; + + int len = snprintf(StringLibContext::get().static_buf.data(), StringLibContext::STATIC_BUFFER_LENGTH, static_SB.c_str(), arg_string.c_str()); + if (len >= StringLibContext::STATIC_BUFFER_LENGTH) { + error_too_big = true; + break; + } + + piece.assign(StringLibContext::get().static_buf.data(), len); + break; + } + case 'x': + case 'X': { + const char *hex_digits = (format[i] == 'x' ? StringLibConstants::get().lhex_digits : StringLibConstants::get().uhex_digits); + auto arg_int = static_cast(arg.to_int()); + + int cur_pos = 70; + do { + StringLibContext::get().static_buf[--cur_pos] = hex_digits[arg_int & 15]; + arg_int >>= 4; + } while (arg_int > 0); + piece.assign(StringLibContext::get().static_buf.data() + cur_pos, 70 - cur_pos); + break; + } + default: + php_warning("Unsupported specifier %%%c in sprintf with format \"%s\"", format[i], format.c_str()); + return {}; + } + } + + result.append(f$str_pad(piece, width, string(1, filler), pad_right)); + } + + if (error_too_big) { + php_warning("Too big result in function sprintf"); + return {}; + } + + return result; +} + +string f$stripcslashes(const string &str) noexcept { + if (str.empty()) { + return str; + } + + // this implementation is an adapted version from php-src + + auto len = str.size(); + auto new_len = len; + string result(len, false); + char *result_c_str = &result[0]; + char num_tmp[4]; // we need up to three digits + a space for null-terminator + int j = 0; + + for (int i = 0; i < len; i++) { + if (str[i] != '\\' || i + 1 >= len) { + *result_c_str++ = str[i]; + } else { + i++; // step over a backslash + switch (str[i]) { + case 'n': + *result_c_str++ = '\n'; + new_len--; + break; + case 'r': + *result_c_str++ = '\r'; + new_len--; + break; + case 'a': + *result_c_str++ = '\a'; + new_len--; + break; + case 't': + *result_c_str++ = '\t'; + new_len--; + break; + case 'v': + *result_c_str++ = '\v'; + new_len--; + break; + case 'b': + *result_c_str++ = '\b'; + new_len--; + break; + case 'f': + *result_c_str++ = '\f'; + new_len--; + break; + case '\\': + *result_c_str++ = '\\'; + new_len--; + break; + case 'x': // \\xN or \\xNN + // collect up to two hex digits and interpret them as char + if (i + 1 < len && isxdigit(static_cast(str[i + 1]))) { + num_tmp[0] = str[++i]; + if (i + 1 < len && isxdigit(static_cast(str[i + 1]))) { + num_tmp[1] = str[++i]; + num_tmp[2] = '\0'; + new_len -= 3; + } else { + num_tmp[1] = '\0'; + new_len -= 2; + } + *result_c_str++ = static_cast(strtol(num_tmp, nullptr, 16)); + } else { + // not a hex literal, just copy a char as i + *result_c_str++ = str[i]; + new_len--; + } + break; + default: // \N \NN \NNN + // collect up to three octal digits and interpret them as char + j = 0; + while (i < len && str[i] >= '0' && str[i] <= '7' && j < 3) { + num_tmp[j++] = str[i++]; + } + if (j) { + num_tmp[j] = '\0'; + *result_c_str++ = static_cast(strtol(num_tmp, nullptr, 8)); + new_len -= j; + i--; + } else { + // not an octal literal, just copy a char as is + *result_c_str++ = str[i]; + new_len--; + } + } + } + } + + if (new_len != 0) { + *result_c_str = '\0'; + } + result.shrink(new_len); + return result; +} + +string f$stripslashes(const string &str) noexcept { + int len = str.size(); + int i = 0; + + string result(len, false); + char *result_c_str = &result[0]; + for (i = 0; i + 1 < len; i++) { + if (str[i] == '\\') { + i++; + if (str[i] == '0') { + *result_c_str++ = '\0'; + continue; + } + } + + *result_c_str++ = str[i]; + } + if (i + 1 == len && str[i] != '\\') { + *result_c_str++ = str[i]; + } + result.shrink(static_cast(result_c_str - result.c_str())); + return result; +} + +Optional f$stripos(const string &haystack, const string &needle, int64_t offset) noexcept { + if (offset < 0) { + php_warning("Wrong offset = %" PRIi64 " in function stripos", offset); + return false; + } + if (offset >= haystack.size()) { + return false; + } + if (needle.size() == 0) { + php_warning("Parameter needle is empty in function stripos"); + return false; + } + + const char *s = strcasestr(haystack.c_str() + offset, needle.c_str()); + if (s == nullptr) { + return false; + } + return s - haystack.c_str(); +} + +static bool php_tag_find(const string &tag, const string &allow) { + if (tag.empty() || allow.empty()) { + return false; + } + + string norm; + int state = 0; + int done = 0; + for (int i = 0; tag[i] && !done; i++) { + char c = static_cast(tolower(tag[i])); + switch (c) { + case '<': + norm.push_back(c); + break; + case '>': + done = 1; + break; + default: + if (!isspace(c)) { + // since PHP5.3.4, self-closing tags are interpreted as normal tags, + // so normalized
=
; note that tags from $allow are not normalized + if (c != '/') { + norm.push_back(c); + } + if (state == 0) { + state = 1; + } + } else { + if (state == 1) { + done = 1; + } + } + break; + } + } + norm.push_back('>'); + return memmem(allow.c_str(), allow.size(), norm.c_str(), norm.size()) != nullptr; +} + +string f$strip_tags(const string &str, const string &allow) { + int br = 0; + int depth = 0; + int in_q = 0; + int state = 0; + + const string allow_low = f$strtolower(allow); + auto &static_SB = RuntimeContext::get().static_SB; + auto &static_SB_spare = RuntimeContext::get().static_SB_spare; + static_SB.clean(); + static_SB_spare.clean(); + char lc = 0; + int len = str.size(); + for (int i = 0; i < len; i++) { + char c = str[i]; + switch (c) { + case '\0': + break; + case '<': + if (!in_q) { + if (isspace(str[i + 1])) { + if (state == 0) { + static_SB << c; + } else if (state == 1) { + static_SB_spare << c; + } + } else if (state == 0) { + lc = '<'; + state = 1; + static_SB_spare << '<'; + } else if (state == 1) { + depth++; + } + } + break; + case '(': + if (state == 2) { + if (lc != '"' && lc != '\'') { + lc = '('; + br++; + } + } else if (state == 1) { + static_SB_spare << c; + } else if (state == 0) { + static_SB << c; + } + break; + case ')': + if (state == 2) { + if (lc != '"' && lc != '\'') { + lc = ')'; + br--; + } + } else if (state == 1) { + static_SB_spare << c; + } else if (state == 0) { + static_SB << c; + } + break; + case '>': + if (depth) { + depth--; + break; + } + + if (in_q) { + break; + } + + switch (state) { + case 1: /* HTML/XML */ + lc = '>'; + in_q = state = 0; + static_SB_spare << '>'; + if (php_tag_find(static_SB_spare.str(), allow_low)) { + static_SB << static_SB_spare.c_str(); + } + static_SB_spare.clean(); + break; + case 2: /* PHP */ + if (!br && lc != '\"' && str[i - 1] == '?') { + in_q = state = 0; + static_SB_spare.clean(); + } + break; + case 3: + in_q = state = 0; + static_SB_spare.clean(); + break; + case 4: /* JavaScript/CSS/etc... */ + if (i >= 2 && str[i - 1] == '-' && str[i - 2] == '-') { + in_q = state = 0; + static_SB_spare.clean(); + } + break; + default: + static_SB << c; + break; + } + break; + + case '"': + case '\'': + if (state == 4) { + /* Inside */ + break; + } else if (state == 2 && str[i - 1] != '\\') { + if (lc == c) { + lc = 0; + } else if (lc != '\\') { + lc = c; + } + } else if (state == 0) { + static_SB << c; + } else if (state == 1) { + static_SB_spare << c; + } + if (state && i > 0 && (state == 1 || str[i - 1] != '\\') && (!in_q || c == in_q)) { + if (in_q) { + in_q = 0; + } else { + in_q = c; + } + } + break; + case '!': + /* JavaScript & Other HTML scripting languages */ + if (state == 1 && str[i - 1] == '<') { + state = 3; + lc = c; + } else { + if (state == 0) { + static_SB << c; + } else if (state == 1) { + static_SB_spare << c; + } + } + break; + case '-': + if (state == 3 && i >= 2 && str[i - 1] == '-' && str[i - 2] == '!') { + state = 4; + } else { + if (state == 0) { + static_SB << c; + } else if (state == 1) { + static_SB_spare << c; + } + } + break; + case '?': + if (state == 1 && str[i - 1] == '<') { + br = 0; + state = 2; + break; + } + /* fall-through */ + case 'E': + case 'e': + /* !DOCTYPE exception */ + if (state == 3 && i > 6 && tolower(str[i - 1]) == 'p' && tolower(str[i - 2]) == 'y' && tolower(str[i - 3]) == 't' && tolower(str[i - 4]) == 'c' + && tolower(str[i - 5]) == 'o' && tolower(str[i - 6]) == 'd') { + state = 1; + break; + } + /* fall-through */ + case 'l': + case 'L': + /* swm: If we encounter ' 2 && tolower(str[i - 1]) == 'm' && tolower(str[i - 2]) == 'x') { + state = 1; + break; + } + + /* fall-through */ + default: + if (state == 0) { + static_SB << c; + } else if (state == 1) { + static_SB_spare << c; + } + break; + } + } + + return static_SB.str(); +} + +template +string strip_tags_string(const array &list) { + string allow_str; + if (!list.empty()) { + allow_str.reserve_at_least(list.count() * strlen("
")); + for (const auto &it : list) { + const auto &s = it.get_value(); + if (!s.empty()) { + allow_str.push_back('<'); + allow_str.append(f$strval(s)); + allow_str.push_back('>'); + } + } + } + return allow_str; +} + +string f$strip_tags(const string &str, const mixed &allow) { + if (!allow.is_array()) { + return f$strip_tags(str, allow.to_string()); + } + auto allow_list = allow.to_array(); + return f$strip_tags(str, strip_tags_string(allow_list)); +} + +string f$strip_tags(const string &str, const array &allow_list) { + return f$strip_tags(str, strip_tags_string(allow_list)); +} + +Optional f$stristr(const string &haystack, const string &needle, bool before_needle) noexcept { + if (static_cast(needle.size()) == 0) { + php_warning("Parameter needle is empty in function stristr"); + return false; + } + + const char *s = strcasestr(haystack.c_str(), needle.c_str()); + if (s == nullptr) { + return false; + } + + const auto pos = static_cast(s - haystack.c_str()); + if (before_needle) { + return haystack.substr(0, pos); + } + return haystack.substr(pos, haystack.size() - pos); +} + +Optional f$strrchr(const string &haystack, const string &needle) noexcept { + if (needle.empty()) { + php_warning("Parameter needle is empty in function strrchr"); + return false; + } + if (needle.size() > 1) { + php_warning("Parameter needle contains more than one character, only the first is used"); + } + const char needle_char = needle[0]; + for (string::size_type pos = haystack.size(); pos != 0; --pos) { + if (haystack[pos - 1] == needle_char) { + return haystack.substr(pos - 1, haystack.size() - pos + 1); + } + } + return false; +} + +/* + Modified for PHP by Andrei Zmievski + Modified for KPHP by Niyaz Nigmatullin + + compare_right, compare_left and strnatcmp_ex functions + Copyright (C) 2000 by Martin Pool + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ + +static int64_t compare_right(char const **a, char const *aend, char const **b, char const *bend) { + int64_t bias = 0; + + /* The longest run of digits wins. That aside, the greatest + value wins, but we can't know that it will until we've scanned + both numbers to know that they have the same magnitude, so we + remember it in BIAS. */ + for (;; (*a)++, (*b)++) { + if ((*a == aend || !isdigit(static_cast(static_cast(**a)))) + && (*b == bend || !isdigit(static_cast(static_cast(**b))))) { + return bias; + } else if (*a == aend || !isdigit(static_cast(static_cast(**a)))) { + return -1; + } else if (*b == bend || !isdigit(static_cast(static_cast(**b)))) { + return +1; + } else if (**a < **b) { + if (!bias) { + bias = -1; + } + } else if (**a > **b) { + if (!bias) { + bias = +1; + } + } + } + + return 0; +} + +static int64_t compare_left(char const **a, char const *aend, char const **b, char const *bend) { + /* Compare two left-aligned numbers: the first to have a + different value wins. */ + for (;; (*a)++, (*b)++) { + if ((*a == aend || !isdigit(static_cast(static_cast(**a)))) + && (*b == bend || !isdigit(static_cast(static_cast(**b))))) { + return 0; + } else if (*a == aend || !isdigit(static_cast(static_cast(**a)))) { + return -1; + } else if (*b == bend || !isdigit(static_cast(static_cast(**b)))) { + return +1; + } else if (**a < **b) { + return -1; + } else if (**a > **b) { + return +1; + } + } + + return 0; +} + +static int64_t strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t b_len, int64_t fold_case) { + unsigned char ca = 0; + unsigned char cb = 0; + char const *ap = nullptr; + char const *bp = nullptr; + char const *aend = a + a_len; + char const *bend = b + b_len; + bool fractional = false; + int64_t result = 0; + short leading = 1; + + if (a_len == 0 || b_len == 0) { + return (a_len == b_len ? 0 : (a_len > b_len ? 1 : -1)); + } + + ap = a; + bp = b; + while (true) { + ca = *ap; + cb = *bp; + + /* skip over leading zeros */ + while (leading && ca == '0' && (ap + 1 < aend) && isdigit(static_cast(static_cast(*(ap + 1))))) { + ca = *++ap; + } + + while (leading && cb == '0' && (bp + 1 < bend) && isdigit(static_cast(static_cast(*(bp + 1))))) { + cb = *++bp; + } + + leading = 0; + + /* Skip consecutive whitespace */ + while (isspace(static_cast(ca))) { + ca = *++ap; + } + + while (isspace(static_cast(cb))) { + cb = *++bp; + } + + /* process run of digits */ + if (isdigit(static_cast(ca)) && isdigit(static_cast(cb))) { + fractional = (ca == '0' || cb == '0'); + + if (fractional) { + result = compare_left(&ap, aend, &bp, bend); + } else { + result = compare_right(&ap, aend, &bp, bend); + } + + if (result != 0) { + return result; + } + + if (ap == aend && bp == bend) { + /* End of the strings. Let caller sort them out. */ + return 0; + } else { + /* Keep on comparing from the current point. */ + ca = *ap; + cb = *bp; + } + } + + if (fold_case) { + ca = static_cast(toupper(ca)); + cb = static_cast(toupper(cb)); + } + + if (ca < cb) { + return -1; + } else if (ca > cb) { + return +1; + } + + ++ap; + ++bp; + if (ap >= aend && bp >= bend) { + /* The strings compare the same. Perhaps the caller + will want to call strcmp to break the tie. */ + return 0; + } else if (ap >= aend) { + return -1; + } else if (bp >= bend) { + return 1; + } + } +} + +int64_t f$strcmp(const string &lhs, const string &rhs) noexcept { + return lhs.compare(rhs); +} + +int64_t f$strnatcmp(const string &lhs, const string &rhs) noexcept { + return strnatcmp_ex(lhs.c_str(), lhs.size(), rhs.c_str(), rhs.size(), 0); +} + +Optional f$strpos(const string &haystack, const string &needle, int64_t offset) noexcept { + if (offset < 0) { + php_warning("Wrong offset = %" PRIi64 " in function strpos", offset); + return false; + } + if (offset > int64_t{haystack.size()}) { + return false; + } + if (needle.size() <= 1) { + if (needle.size() == 0) { + php_warning("Parameter needle is empty in function strpos"); + return false; + } + + const char *s = static_cast(memchr(haystack.c_str() + offset, needle[0], haystack.size() - offset)); + if (s == nullptr) { + return false; + } + return s - haystack.c_str(); + } + + const char *s = static_cast(memmem(haystack.c_str() + offset, haystack.size() - offset, needle.c_str(), needle.size())); + if (s == nullptr) { + return false; + } + return s - haystack.c_str(); +} + +Optional f$strrpos(const string &haystack, const string &needle, int64_t offset) noexcept { + const char *end = haystack.c_str() + haystack.size(); + if (offset < 0) { + offset += haystack.size() + 1; + if (offset < 0) { + return false; + } + + end = haystack.c_str() + offset; + offset = 0; + } + if (offset >= haystack.size()) { + return false; + } + if (needle.size() == 0) { + php_warning("Parameter needle is empty in function strrpos"); + return false; + } + + const char *s = static_cast(memmem(haystack.c_str() + offset, haystack.size() - offset, needle.c_str(), needle.size())); + const char *t = nullptr; + if (s == nullptr || s >= end) { + return false; + } + while ((t = static_cast(memmem(s + 1, haystack.c_str() + haystack.size() - s - 1, needle.c_str(), needle.size()))) != nullptr && t < end) { + s = t; + } + return s - haystack.c_str(); +} + +Optional f$strripos(const string &haystack, const string &needle, int64_t offset) noexcept { + const char *end = haystack.c_str() + haystack.size(); + if (offset < 0) { + offset += haystack.size() + 1; + if (offset < 0) { + return false; + } + + end = haystack.c_str() + offset; + offset = 0; + } + if (offset >= haystack.size()) { + return false; + } + if (needle.size() == 0) { + php_warning("Parameter needle is empty in function strripos"); + return false; + } + + const char *s = strcasestr(haystack.c_str() + offset, needle.c_str()); + const char *t = nullptr; + if (s == nullptr || s >= end) { + return false; + } + while ((t = strcasestr(s + 1, needle.c_str())) != nullptr && t < end) { + s = t; + } + return s - haystack.c_str(); +} + +Optional f$strstr(const string &haystack, const string &needle, bool before_needle) noexcept { + if (static_cast(needle.size()) == 0) { + php_warning("Parameter needle is empty in function strstr"); + return false; + } + + const char *s = static_cast(memmem(haystack.c_str(), haystack.size(), needle.c_str(), needle.size())); + if (s == nullptr) { + return false; + } + + const auto pos = static_cast(s - haystack.c_str()); + if (before_needle) { + return haystack.substr(0, pos); + } + return haystack.substr(pos, haystack.size() - pos); +} + +string f$strtolower(const string &str) noexcept { + int n = str.size(); + + // if there is no upper case char inside the string, we can + // return the argument unchanged, avoiding the allocation and data copying; + // while at it, memorize the first upper case char, so we can + // use memcpy to copy everything before that pos; + // note: do not use islower() here, the compiler does not inline that function call; + // it could be beneficial to use 256-byte LUT here, but SIMD approach could be even better + const char *end = str.c_str() + n; + const char *uppercase_pos = std::find_if(str.c_str(), end, [](unsigned char ch) { return ch >= 'A' && ch <= 'Z'; }); + if (uppercase_pos == end) { + return str; + } + + string res(n, false); + int64_t lowercase_prefix = uppercase_pos - str.c_str(); + if (lowercase_prefix != 0) { // avoid unnecessary function call + std::memcpy(res.buffer(), str.c_str(), lowercase_prefix); + } + for (int i = lowercase_prefix; i < n; i++) { + res[i] = static_cast(std::tolower(static_cast(str[i]))); + } + + return res; +} + +string f$strtoupper(const string &str) noexcept { + int n = str.size(); + + // same optimization as in strtolower + const char *end = str.c_str() + n; + const char *lowercase_pos = std::find_if(str.c_str(), end, [](unsigned char ch) { return ch >= 'a' && ch <= 'z'; }); + if (lowercase_pos == end) { + return str; + } + + string res(n, false); + int64_t uppercase_prefix = lowercase_pos - str.c_str(); + if (uppercase_prefix != 0) { // avoid unnecessary function call + std::memcpy(res.buffer(), str.c_str(), uppercase_prefix); + } + for (int i = uppercase_prefix; i < n; i++) { + res[i] = static_cast(std::toupper(static_cast(str[i]))); + } + + return res; +} + +string f$strtr(const string &subject, const string &from, const string &to) noexcept { + int n = subject.size(); + string result(n, false); + for (int i = 0; i < n; i++) { + const char *p = static_cast( + memchr(static_cast(from.c_str()), static_cast(static_cast(subject[i])), static_cast(from.size()))); + if (p == nullptr || static_cast(p - from.c_str()) >= to.size()) { + result[i] = subject[i]; + } else { + result[i] = to[static_cast(p - from.c_str())]; + } + } + return result; +} + +string f$str_pad(const string &input, int64_t len, const string &pad_str, int64_t pad_type) noexcept { + string::size_type old_len = input.size(); + if (len <= old_len) { + return input; + } + if (len > string::max_size()) { + php_critical_error("tried to allocate too big string of size %" PRIi64, len); + } + + const auto strlen = static_cast(len); + + string::size_type pad_left = 0; + string::size_type pad_right = 0; + if (pad_type == StringLibConstants::STR_PAD_RIGHT) { + pad_right = strlen - old_len; + } else if (pad_type == StringLibConstants::STR_PAD_LEFT) { + pad_left = strlen - old_len; + } else if (pad_type == StringLibConstants::STR_PAD_BOTH) { + pad_left = (strlen - old_len) / 2; + pad_right = (strlen - old_len + 1) / 2; + } else { + php_warning("Wrong parameter pad_type in function str_pad"); + return input; + } + + string::size_type pad_len = pad_str.size(); + if (pad_len == 0) { + php_warning("Wrong parameter pad_str (empty string) in function str_pad"); + return input; + } + + string res(strlen, false); + for (string::size_type i = 0; i < pad_left; i++) { + res[i] = pad_str[i % pad_len]; + } + memcpy(&res[pad_left], input.c_str(), old_len); + for (string::size_type i = 0; i < pad_right; i++) { + res[i + pad_left + old_len] = pad_str[i % pad_len]; + } + + return res; +} + +string f$str_repeat(const string &s, int64_t multiplier) noexcept { + const string::size_type len = s.size(); + if (multiplier <= 0 || len == 0) { + return {}; + } + + auto mult = static_cast(multiplier); + if (string::max_size() / len < mult) { + php_critical_error("tried to allocate too big string of size %" PRIi64, multiplier * len); + } + + if (len == 1) { + return {mult, s[0]}; + } + + string result(mult * len, false); + if (len >= 5) { + while (mult--) { + memcpy(&result[mult * len], s.c_str(), len); + } + } else { + for (string::size_type i = 0; i < mult; i++) { + for (string::size_type j = 0; j < len; j++) { + result[i * len + j] = s[j]; + } + } + } + return result; +} + +static string str_replace_char(char c, const string &replace, const string &subject, int64_t &replace_count, bool with_case) { + int count = 0; + const char *piece = subject.c_str(); + const char *piece_end = subject.c_str() + subject.size(); + + string result; + if (!replace.empty()) { + result.reserve_at_least(subject.size()); + } + + while (true) { + const char *pos = nullptr; + if (with_case) { + pos = static_cast(memchr(piece, c, piece_end - piece)); + } else { + const char needle[] = {c, '\0'}; + pos = strcasestr(piece, needle); + } + + if (pos == nullptr) { + if (count == 0) { + return subject; + } + replace_count += count; + result.append(piece, static_cast(piece_end - piece)); + return result; + } + + ++count; + + result.append(piece, static_cast(pos - piece)); + result.append(replace); + + piece = pos + 1; + } + php_assert(0); // unreachable + return {}; +} + +static const char *find_substr(const char *where, const char *where_end, const string &what, bool with_case) { + if (with_case) { + return static_cast(memmem(where, where_end - where, what.c_str(), what.size())); + } + + return strcasestr(where, what.c_str()); +} + +void str_replace_inplace(const string &search, const string &replace, string &subject, int64_t &replace_count, bool with_case) noexcept { + if (search.empty()) { + php_warning("Parameter search is empty in function str_replace"); + return; + } + + subject.make_not_shared(); + + int count = 0; + const char *piece = subject.c_str(); + const char *piece_end = subject.c_str() + subject.size(); + + char *output = subject.buffer(); + bool length_no_change = search.size() == replace.size(); + + while (true) { + const char *pos = find_substr(piece, piece_end, search, with_case); + if (pos == nullptr) { + if (count == 0) { + return; + } + replace_count += count; + if (!length_no_change) { + memmove(output, piece, piece_end - piece); + } + output += piece_end - piece; + if (!length_no_change) { + subject.shrink(static_cast(output - subject.c_str())); + } + return; + } + + ++count; + + if (!length_no_change) { + memmove(output, piece, pos - piece); + } + output += pos - piece; + memcpy(output, replace.c_str(), replace.size()); + output += replace.size(); + + piece = pos + search.size(); + } + php_assert(0); // unreachable +} + +string str_replace(const string &search, const string &replace, const string &subject, int64_t &replace_count, bool with_case) noexcept { + if (search.empty()) { + php_warning("Parameter search is empty in function str_replace"); + return subject; + } + + int count = 0; + const char *piece = subject.c_str(); + const char *piece_end = subject.c_str() + subject.size(); + + string result; + while (true) { + const char *pos = find_substr(piece, piece_end, search, with_case); + if (pos == nullptr) { + if (count == 0) { + return subject; + } + replace_count += count; + result.append(piece, static_cast(piece_end - piece)); + return result; + } + + ++count; + + result.append(piece, static_cast(pos - piece)); + result.append(replace); + + piece = pos + search.size(); + } + php_assert(0); // unreachable + return {}; +} + +// common for f$str_replace(string) and f$str_ireplace(string) +string str_replace_gen(const string &search, const string &replace, const string &subject, int64_t &replace_count, bool with_case); + +string str_replace_string(const mixed &search, const mixed &replace, const string &subject, int64_t &replace_count, bool with_case) { + if (search.is_array() && replace.is_array()) { + return str_replace_string_array(search.as_array(""), replace.as_array(""), subject, replace_count, with_case); + } else if (search.is_array()) { + string result = subject; + const string &replace_value = replace.to_string(); + + for (array::const_iterator it = search.begin(); it != search.end(); ++it) { + const string &search_string = f$strval(it.get_value()); + if (search_string.size() >= replace_value.size()) { + str_replace_inplace(search_string, replace_value, result, replace_count, with_case); + } else { + result = str_replace(search_string, replace_value, result, replace_count, with_case); + } + } + return result; + } else { + if (replace.is_array()) { + php_warning("Parameter mismatch, search is a string while replace is an array"); + // return false; + } + + return str_replace_gen(f$strval(search), f$strval(replace), subject, replace_count, with_case); + } +} + +// common for f$str_replace(string) and f$str_ireplace(string) +string str_replace_gen(const string &search, const string &replace, const string &subject, int64_t &replace_count, bool with_case) { + replace_count = 0; + if (search.size() == 1) { + return str_replace_char(search[0], replace, subject, replace_count, with_case); + } else { + return str_replace(search, replace, subject, replace_count, with_case); + } +} + +string f$str_replace(const string &search, const string &replace, const string &subject, int64_t &replace_count) noexcept { + return str_replace_gen(search, replace, subject, replace_count, true); +} + +string f$str_ireplace(const string &search, const string &replace, const string &subject, int64_t &replace_count) noexcept { + return str_replace_gen(search, replace, subject, replace_count, false); +} + +string f$str_replace(const mixed &search, const mixed &replace, const string &subject, int64_t &replace_count) noexcept { + return str_replace_string(search, replace, subject, replace_count, true); +} + +string f$str_ireplace(const mixed &search, const mixed &replace, const string &subject, int64_t &replace_count) noexcept { + return str_replace_string(search, replace, subject, replace_count, false); +} + +// common for f$str_replace(mixed) and f$str_ireplace(mixed) +mixed str_replace_gen(const mixed &search, const mixed &replace, const mixed &subject, int64_t &replace_count, bool with_case) { + replace_count = 0; + if (subject.is_array()) { + array result; + for (array::const_iterator it = subject.begin(); it != subject.end(); ++it) { + mixed cur_result = str_replace_string(search, replace, it.get_value().to_string(), replace_count, with_case); + if (!cur_result.is_null()) { + result.set_value(it.get_key(), cur_result); + } + } + return result; + } else { + return str_replace_string(search, replace, subject.to_string(), replace_count, with_case); + } +} + +mixed f$str_replace(const mixed &search, const mixed &replace, const mixed &subject, int64_t &replace_count) noexcept { + return str_replace_gen(search, replace, subject, replace_count, true); +} + +mixed f$str_ireplace(const mixed &search, const mixed &replace, const mixed &subject, int64_t &replace_count) noexcept { + return str_replace_gen(search, replace, subject, replace_count, false); +} + +array f$str_split(const string &str, int64_t split_length) noexcept { + if (split_length <= 0) { + php_warning("Wrong parameter split_length = %" PRIi64 " in function str_split", split_length); + array result(array_size(1, true)); + result.set_value(0, str); + return result; + } + + array result(array_size((str.size() + split_length - 1) / split_length, true)); + string::size_type i = 0; + for (i = 0; i + split_length <= str.size(); i += static_cast(split_length)) { + result.push_back(str.substr(i, static_cast(split_length))); + } + if (i < str.size()) { + result.push_back(str.substr(i, str.size() - i)); + } + return result; +} + +int64_t f$substr_count(const string &haystack, const string &needle, int64_t offset, int64_t length) noexcept { + offset = haystack.get_correct_offset(offset); + if (offset >= haystack.size()) { + return 0; + } + if (length > haystack.size() - offset) { + length = haystack.size() - offset; + } + + int64_t ans = 0; + const char *s = haystack.c_str() + offset; + const char *end = haystack.c_str() + offset + length; + if (needle.empty()) { + php_warning("Needle is empty in function substr_count"); + return end - s; + } + do { + s = static_cast( + memmem(static_cast(s), static_cast(end - s), static_cast(needle.c_str()), static_cast(needle.size()))); + if (s == nullptr) { + return ans; + } + ans++; + s += needle.size(); + } while (true); +} + +string f$substr_replace(const string &str, const string &replacement, int64_t start, int64_t length) noexcept { + int64_t str_len = str.size(); + + // if $start is negative, count $start from the end of the string + start = str.get_correct_offset_clamped(start); + + // if $length is negative, set it to the length needed + // needed to stop that many chars from the end of the string + if (length < 0) { + length = (str_len - start) + length; + if (length < 0) { + length = 0; + } + } + + if (length > str_len) { + length = str_len; + } + if ((start + length) > str_len) { + length = str_len - start; + } + + auto result = str.substr(0, static_cast(start)); + result.append(replacement); + const auto offset = static_cast(start + length); + result.append(str.substr(offset, str.size() - offset)); + return result; +} + +Optional f$substr_compare(const string &main_str, const string &str, int64_t offset, int64_t length, bool case_insensitivity) noexcept { + int64_t str_len = main_str.size(); + + if (length < 0) { + php_warning("The length must be greater than or equal to zero in substr_compare function call"); + return false; + } + + offset = main_str.get_correct_offset(offset); + + // > and >= signs depend on version of PHP7.2 and could vary unpredictably. We put `>` sign which corresponds to behaviour of PHP7.2.22 + if (offset > str_len) { + php_warning("The start position cannot exceed initial string length in substr_compare function call"); + return false; + } + + if (case_insensitivity) { + return strncasecmp(main_str.c_str() + offset, str.c_str(), length); + } else { + return strncmp(main_str.c_str() + offset, str.c_str(), length); + } +} + +tmp_string trim_impl(const char *s, string::size_type s_len, const string &what) { + const char *mask = get_mask(what); + + int len = s_len; + if (len == 0 || (!mask[static_cast(s[len - 1])] && !mask[static_cast(s[0])])) { + return {s, s_len}; + } + + while (len > 0 && mask[static_cast(s[len - 1])]) { + len--; + } + + if (len == 0) { + return {}; + } + + int l = 0; + while (mask[static_cast(s[l])]) { + l++; + } + return {s + l, static_cast(len - l)}; +} + +tmp_string f$_tmp_trim(tmp_string s, const string &what) noexcept { + return trim_impl(s.data, s.size, what); +} + +tmp_string f$_tmp_trim(const string &s, const string &what) noexcept { + return trim_impl(s.c_str(), s.size(), what); +} + +string f$trim(tmp_string s, const string &what) noexcept { + return materialize_tmp_string(trim_impl(s.data, s.size, what)); +} + +string f$trim(const string &s, const string &what) noexcept { + tmp_string result = trim_impl(s.c_str(), s.size(), what); + if (result.data == s.c_str() && result.size == s.size()) { + return s; + } + return materialize_tmp_string(result); +} + +string f$ucwords(const string &str) noexcept { + int n = str.size(); + + bool in_word = false; + string res(n, false); + for (int i = 0; i < n; i++) { + int cur = str[i] & 0xdf; + if ('A' <= cur && cur <= 'Z') { + if (in_word) { + res[i] = str[i]; + } else { + res[i] = static_cast(cur); + in_word = true; + } + } else { + res[i] = str[i]; + in_word = false; + } + } + + return res; +} + +Optional> f$unpack(const string &pattern, const string &data) noexcept { + array result; + + int data_len = data.size(); + int data_pos = 0; + for (int i = 0; i < static_cast(pattern.size());) { + char format = pattern[i++]; + int cnt = -1; + if ('0' <= pattern[i] && pattern[i] <= '9') { + cnt = 0; + do { + cnt = cnt * 10 + pattern[i++] - '0'; + } while ('0' <= pattern[i] && pattern[i] <= '9'); + + if (cnt <= 0) { + php_warning("Wrong count specifier in pattern \"%s\"", pattern.c_str()); + return false; + } + } else if (pattern[i] == '*') { + cnt = 0; + i++; + } + if (data_pos >= data_len) { + if (format == 'A' || format == 'a' || format == 'H' || format == 'h' || cnt != 0) { + php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); + return false; + } + return result; + } + + const char *key_end = strchrnul(&pattern[i], '/'); + string key_prefix(pattern.c_str() + i, static_cast(key_end - pattern.c_str() - i)); + i = static_cast(key_end - pattern.c_str()); + if (i < static_cast(pattern.size())) { + i++; + } + + if (cnt == 0 && i != static_cast(pattern.size())) { + php_warning("Misplaced symbol '*' in pattern \"%s\"", pattern.c_str()); + return false; + } + + char filler = 0; + switch (format) { + case 'A': + filler = ' '; + /* fallthrough */ + case 'a': { + if (cnt == 0) { + cnt = data_len - data_pos; + } else if (cnt == -1) { + cnt = 1; + } + int read_len = cnt; + if (read_len + data_pos > data_len) { + php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); + return false; + } + while (cnt > 0 && data[data_pos + cnt - 1] == filler) { + cnt--; + } + + if (key_prefix.empty()) { + key_prefix = StringLibConstants::get().ONE_STR; + } + + result.set_value(key_prefix, string(data.c_str() + data_pos, cnt)); + + data_pos += read_len; + break; + } + case 'h': + case 'H': { + if (cnt == 0) { + cnt = (data_len - data_pos) * 2; + } else if (cnt == -1) { + cnt = 1; + } + + int read_len = (cnt + 1) / 2; + if (read_len + data_pos > data_len) { + php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); + return false; + } + + string value(cnt, false); + for (int j = data_pos; cnt > 0; j++, cnt -= 2) { + unsigned char ch = data[j]; + char num_high = StringLibConstants::get().lhex_digits[ch >> 4]; + char num_low = StringLibConstants::get().lhex_digits[ch & 15]; + if (format == 'h') { + swap(num_high, num_low); + } + + value[(j - data_pos) * 2] = num_high; + if (cnt > 1) { + value[(j - data_pos) * 2 + 1] = num_low; + } + } + php_assert(cnt == 0 || cnt == -1); + + if (key_prefix.empty()) { + key_prefix = StringLibConstants::get().ONE_STR; + } + + result.set_value(key_prefix, value); + + data_pos += read_len; + break; + } + + default: { + if (key_prefix.empty() && cnt == -1) { + key_prefix = StringLibConstants::get().ONE_STR; + } + int counter = 1; + do { + mixed value; + int value_int = 0; + if (data_pos >= data_len) { + php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); + return false; + } + + switch (format) { + case 'c': + case 'C': + value_int = static_cast(data[data_pos++]); + if (format != 'c' && value_int < 0) { + value_int += 256; + } + value = value_int; + break; + case 's': + case 'S': + case 'v': + value_int = static_cast(data[data_pos]); + if (data_pos + 1 < data_len) { + value_int |= data[data_pos + 1] << 8; + } + data_pos += 2; + if (format != 's' && value_int < 0) { + value_int += 65536; + } + value = value_int; + break; + case 'n': + value_int = static_cast(data[data_pos]) << 8; + if (data_pos + 1 < data_len) { + value_int |= static_cast(data[data_pos + 1]); + } + data_pos += 2; + value = value_int; + break; + case 'i': + case 'I': + case 'l': + case 'L': + case 'V': + value_int = static_cast(data[data_pos]); + if (data_pos + 1 < data_len) { + value_int |= static_cast(data[data_pos + 1]) << 8; + if (data_pos + 2 < data_len) { + value_int |= static_cast(data[data_pos + 2]) << 16; + if (data_pos + 3 < data_len) { + value_int |= data[data_pos + 3] << 24; + } + } + } + data_pos += 4; + value = value_int; + break; + case 'N': + value_int = static_cast(data[data_pos]) << 24; + if (data_pos + 1 < data_len) { + value_int |= static_cast(data[data_pos + 1]) << 16; + if (data_pos + 2 < data_len) { + value_int |= static_cast(data[data_pos + 2]) << 8; + if (data_pos + 3 < data_len) { + value_int |= static_cast(data[data_pos + 3]); + } + } + } + data_pos += 4; + value = value_int; + break; + case 'f': { + if (data_pos + static_cast(sizeof(float)) > data_len) { + php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); + return false; + } + value = static_cast(*(float *)(data.c_str() + data_pos)); + data_pos += static_cast(sizeof(float)); + break; + } + case 'e': + case 'E': + case 'd': { + if (data_pos + static_cast(sizeof(double)) > data_len) { + php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); + return false; + } + uint64_t value_byteordered = 0; + memcpy(&value_byteordered, data.c_str() + data_pos, sizeof(double)); + if (format == 'e') { + value_byteordered = le64toh(value_byteordered); + } else if (format == 'E') { + value_byteordered = be64toh(value_byteordered); + } + value = float64_from_bits(value_byteordered); + data_pos += static_cast(sizeof(double)); + break; + } + case 'J': + case 'P': + case 'Q': { + if (data_pos + static_cast(sizeof(unsigned long long)) > data_len) { + php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); + return false; + } + + // stored in the host machine order by the default (Q flag) + unsigned long long value_byteordered = 0; + memcpy(&value_byteordered, data.c_str() + data_pos, sizeof(value_byteordered)); + if (format == 'P') { + // for P encode in little endian order + value_byteordered = le64toh(value_byteordered); + } else if (format == 'J') { + // for J encode in big endian order + value_byteordered = be64toh(value_byteordered); + } + + const size_t buf_size = 20; + char buf[buf_size]; + value = string{buf, static_cast(simd_uint64_to_string(value_byteordered, buf) - buf)}; + data_pos += static_cast(sizeof(unsigned long long)); + break; + } + case 'q': { + if (data_pos + static_cast(sizeof(long long)) > data_len) { + php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); + return false; + } + long long value_ll = *reinterpret_cast(data.c_str() + data_pos); + value = f$strval(static_cast(value_ll)); + data_pos += static_cast(sizeof(long long)); + break; + } + default: + php_warning("Format code \"%c\" not supported", format); + return false; + } + + string key = key_prefix; + if (cnt != -1) { + key.append(string(counter++)); + } + + result.set_value(key, value); + + if (cnt == 0) { + if (data_pos >= data_len) { + return result; + } + } + } while (cnt == 0 || --cnt > 0); + } + } + } + return result; +} + +string f$wordwrap(const string &str, int64_t width, const string &brk, bool cut) noexcept { + if (width <= 0) { + php_warning("Wrong parameter width = %" PRIi64 " in function wordwrap", width); + return str; + } + + string result; + string::size_type first = 0; + const string::size_type n = str.size(); + int64_t last_space = -1; + for (string::size_type i = 0; i < n; i++) { + if (str[i] == ' ') { + last_space = i; + } + if (i >= first + width && (cut || last_space > first)) { + if (last_space <= first) { + result.append(str, first, i - first); + first = i; + } else { + result.append(str, first, static_cast(last_space) - first); + first = static_cast(last_space + 1); + } + result.append(brk); + } + } + result.append(str, first, str.size() - first); + return result; +} + +string f$xor_strings(const string &s, const string &t) noexcept { + string::size_type length = min(s.size(), t.size()); + string result{length, false}; + const char *s_str = s.c_str(); + const char *t_str = t.c_str(); + char *res_str = result.buffer(); + for (string::size_type i = 0; i < length; i++) { + *res_str = *s_str ^ *t_str; + ++s_str; + ++t_str; + ++res_str; + } + return result; +} + +namespace impl_ { +// Based on the original PHP implementation +// https://github.com/php/php-src/blob/e8678fcb42c5cb1ea38ff9c6819baca74c2bb5ea/ext/standard/string.c#L3375-L3418 +inline size_t php_similar_str(vk::string_view first, vk::string_view second, size_t &pos1, size_t &pos2, size_t &count) { + size_t max = 0; + count = 0; + for (const char *p = first.begin(); p != first.end(); ++p) { + for (const char *q = second.begin(); q != second.end(); ++q) { + size_t l = 0; + for (; (p + l < first.end()) && (q + l < second.end()) && (p[l] == q[l]); ++l) { + } + if (l > max) { + max = l; + ++count; + pos1 = p - first.begin(); + pos2 = q - second.begin(); + } + } + } + return max; +} + +size_t php_similar_char(vk::string_view first, vk::string_view second) { + size_t pos1 = 0; + size_t pos2 = 0; + size_t count = 0; + + const size_t max = php_similar_str(first, second, pos1, pos2, count); + size_t sum = max; + if (sum) { + if (pos1 && pos2 && count > 1) { + sum += php_similar_char(first.substr(0, pos1), second.substr(0, pos2)); + } + pos1 += max; + pos2 += max; + if (pos1 < first.size() && pos2 < second.size()) { + sum += php_similar_char(first.substr(pos1), second.substr(pos2)); + } + } + return sum; +} + +} // namespace impl_ + +int64_t f$similar_text(const string &first, const string &second, double &percent) noexcept { + if (first.empty() && second.empty()) { + percent = 0.0; + return 0; + } + const size_t sim = impl_::php_similar_char(vk::string_view{first.c_str(), first.size()}, vk::string_view{second.c_str(), second.size()}); + percent = static_cast(sim) * 200.0 / (first.size() + second.size()); + return static_cast(sim); +} + +string str_concat(const string &s1, const string &s2) noexcept { + // for 2 argument concatenation it's not so uncommon to have at least one empty string argument; + // it happens in cases like `$prefix . $s` where $prefix could be empty depending on some condition + // real-world applications analysis shows that ~17.6% of all two arguments concatenations have + // at least one empty string argument + // + // checking both lengths for 0 is almost free, but when we step into those 17.6%, we get almost x10 + // faster concatenation and no heap allocations + // + // this idea is borrowed from the Go runtime + if (s1.empty()) { + return s2; + } + if (s2.empty()) { + return s1; + } + auto new_size = s1.size() + s2.size(); + return string(new_size, true).append_unsafe(s1).append_unsafe(s2).finish_append(); +} + +string str_concat(str_concat_arg s1, str_concat_arg s2) noexcept { + auto new_size = s1.size + s2.size; + return string(new_size, true).append_unsafe(s1.as_tmp_string()).append_unsafe(s2.as_tmp_string()).finish_append(); +} + +string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3) noexcept { + auto new_size = s1.size + s2.size + s3.size; + return string(new_size, true).append_unsafe(s1.as_tmp_string()).append_unsafe(s2.as_tmp_string()).append_unsafe(s3.as_tmp_string()).finish_append(); +} + +string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4) noexcept { + auto new_size = s1.size + s2.size + s3.size + s4.size; + return string(new_size, true) + .append_unsafe(s1.as_tmp_string()) + .append_unsafe(s2.as_tmp_string()) + .append_unsafe(s3.as_tmp_string()) + .append_unsafe(s4.as_tmp_string()) + .finish_append(); +} + +string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4, str_concat_arg s5) noexcept { + auto new_size = s1.size + s2.size + s3.size + s4.size + s5.size; + return string(new_size, true) + .append_unsafe(s1.as_tmp_string()) + .append_unsafe(s2.as_tmp_string()) + .append_unsafe(s3.as_tmp_string()) + .append_unsafe(s4.as_tmp_string()) + .append_unsafe(s5.as_tmp_string()) + .finish_append(); +} diff --git a/runtime-common/stdlib/string/string-functions.h b/runtime-common/stdlib/string/string-functions.h new file mode 100644 index 0000000000..18ede0d20f --- /dev/null +++ b/runtime-common/stdlib/string/string-functions.h @@ -0,0 +1,527 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2024 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#pragma once + +#include +#include +#include + +#include "runtime-common/core/runtime-core.h" +#include "runtime-common/core/utils/kphp-assert-core.h" +#include "runtime-common/stdlib/string/string-context.h" + +string f$addcslashes(const string &str, const string &what) noexcept; + +string f$addslashes(const string &str) noexcept; + +string f$hex2bin(const string &str) noexcept; + +inline string f$bin2hex(const string &str) noexcept { + int len = str.size(); + string result(2 * len, false); + + for (int i = 0; i < len; i++) { + result[2 * i] = StringLibConstants::get().lhex_digits[(str[i] >> 4) & 15]; + result[2 * i + 1] = StringLibConstants::get().lhex_digits[str[i] & 15]; + } + + return result; +} + +string f$convert_cyr_string(const string &str, const string &from_s, const string &to_s) noexcept; + +inline string f$chr(int64_t v) noexcept { + return {1, static_cast(v)}; +} + +inline int64_t f$ord(const string &s) noexcept { + return static_cast(s[0]); +} + +mixed f$count_chars(const string &str, int64_t mode = 0) noexcept; + +string f$htmlentities(const string &str) noexcept; + +string f$html_entity_decode(const string &str, int64_t flags = StringLibConstants::ENT_COMPAT | StringLibConstants::ENT_HTML401, + const string &encoding = StringLibConstants::get().CP1251_STR) noexcept; + +string f$htmlspecialchars(const string &str, int64_t flags = StringLibConstants::ENT_COMPAT | StringLibConstants::ENT_HTML401) noexcept; + +string f$htmlspecialchars_decode(const string &str, int64_t flags = StringLibConstants::ENT_COMPAT | StringLibConstants::ENT_HTML401) noexcept; + +inline string f$lcfirst(const string &str) noexcept { + int n = str.size(); + if (n == 0) { + return str; + } + + string res(n, false); + res[0] = static_cast(tolower(str[0])); + memcpy(&res[1], &str[1], n - 1); + + return res; +} + +int64_t f$levenshtein(const string &str1, const string &str2) noexcept; + +string f$ltrim(const string &s, const string &what = StringLibConstants::get().WHAT_STR) noexcept; + +string f$mysql_escape_string(const string &str) noexcept; + +string f$nl2br(const string &str, bool is_xhtml = true) noexcept; + +string f$number_format(double number, int64_t decimals, const string &dec_point, const string &thousands_sep) noexcept; + +string f$pack(const string &pattern, const array &a) noexcept; + +string f$rtrim(const string &s, const string &what = StringLibConstants::get().WHAT_STR) noexcept; + +inline string f$chop(const string &s, const string &what = StringLibConstants::get().WHAT_STR) noexcept { + return f$rtrim(s, what); +} + +string f$sprintf(const string &format, const array &a) noexcept; + +string f$stripcslashes(const string &str) noexcept; + +string f$stripslashes(const string &str) noexcept; + +inline int64_t f$strcasecmp(const string &lhs, const string &rhs) noexcept { + int n = min(lhs.size(), rhs.size()); + for (int i = 0; i < n; i++) { + if (tolower(lhs[i]) != tolower(rhs[i])) { + return tolower(lhs[i]) - tolower(rhs[i]); + } + } + // TODO: for PHP8.2, use <=> operator instead: + // return spaceship(static_cast(lhs.size()), static_cast(rhs.size())); + return static_cast(lhs.size()) - static_cast(rhs.size()); +} + +int64_t f$strcmp(const string &lhs, const string &rhs) noexcept; + +string f$strip_tags(const string &str, const string &allow = string{}); + +inline string f$strip_tags(const string &str, const array &allow_list) noexcept { + php_assert(allow_list.empty()); + return f$strip_tags(str, string()); +} + +string f$strip_tags(const string &str, const mixed &allow); + +string f$strip_tags(const string &str, const array &allow_list); + +Optional f$stripos(const string &haystack, const string &needle, int64_t offset = 0) noexcept; + +inline Optional f$stripos(const string &haystack, const mixed &needle, int64_t offset = 0) noexcept { + if (needle.is_string()) { + return f$stripos(haystack, needle.to_string(), offset); + } else { + return f$stripos(haystack, string(1, static_cast(needle.to_int())), offset); + } +} + +Optional f$stristr(const string &haystack, const string &needle, bool before_needle = false) noexcept; + +Optional f$strrchr(const string &haystack, const string &needle) noexcept; + +inline int64_t f$strncmp(const string &lhs, const string &rhs, int64_t len) noexcept { + if (len < 0) { + return 0; + } + return std::memcmp(lhs.c_str(), rhs.c_str(), min(int64_t{min(lhs.size(), rhs.size())} + 1, len)); +} + +int64_t f$strnatcmp(const string &lhs, const string &rhs) noexcept; + +inline int64_t f$strspn(const string &hayshack, const string &char_list, int64_t offset = 0) noexcept { + return std::strspn(hayshack.c_str() + hayshack.get_correct_offset_clamped(offset), char_list.c_str()); +} + +inline int64_t f$strcspn(const string &hayshack, const string &char_list, int64_t offset = 0) noexcept { + return std::strcspn(hayshack.c_str() + hayshack.get_correct_offset_clamped(offset), char_list.c_str()); +} + +inline Optional f$strpbrk(const string &haystack, const string &char_list) noexcept { + const char *pos = std::strpbrk(haystack.c_str(), char_list.c_str()); + if (pos == nullptr) { + return false; + } + + return string(pos, static_cast(haystack.size() - (pos - haystack.c_str()))); +} + +Optional f$strpos(const string &haystack, const string &needle, int64_t offset = 0) noexcept; + +inline Optional f$strpos(const string &haystack, const mixed &needle, int64_t offset = 0) noexcept { + if (needle.is_string()) { + return f$strpos(haystack, needle.to_string(), offset); + } else { + return f$strpos(haystack, string(1, static_cast(needle.to_int())), offset); + } +} + +template +Optional f$strpos(const string &haystack, const Optional &needle, int64_t offset = 0) noexcept { + return f$strpos(haystack, needle.val(), offset); +} + +Optional f$strrpos(const string &haystack, const string &needle, int64_t offset = 0) noexcept; + +Optional f$strripos(const string &haystack, const string &needle, int64_t offset = 0) noexcept; + +inline string f$strrev(const string &str) noexcept { + int n = str.size(); + + string res(n, false); + for (int i = 0; i < n; i++) { + res[n - i - 1] = str[i]; + } + + return res; +} + +Optional f$strstr(const string &haystack, const string &needle, bool before_needle = false) noexcept; + +string f$strtolower(const string &str) noexcept; + +string f$strtoupper(const string &str) noexcept; + +string f$strtr(const string &subject, const string &from, const string &to) noexcept; + +string f$str_pad(const string &input, int64_t len, const string &pad_str = StringLibConstants::get().SPACE_STR, + int64_t pad_type = StringLibConstants::STR_PAD_RIGHT) noexcept; + +string f$str_repeat(const string &s, int64_t multiplier) noexcept; + +string f$str_replace(const string &search, const string &replace, const string &subject, + int64_t &replace_count = StringLibContext::get().str_replace_count_dummy) noexcept; + +string f$str_ireplace(const string &search, const string &replace, const string &subject, + int64_t &replace_count = StringLibContext::get().str_replace_count_dummy) noexcept; + +void str_replace_inplace(const string &search, const string &replace, string &subject, int64_t &replace_count, bool with_case) noexcept; + +string str_replace(const string &search, const string &replace, const string &subject, int64_t &replace_count, bool with_case) noexcept; + +template +string str_replace_string_array(const array &search, const array &replace, const string &subject, int64_t &replace_count, bool with_case) noexcept { + string result = subject; + string replace_value; + typename array::const_iterator cur_replace_val = replace.begin(); + + for (typename array::const_iterator it = search.begin(); it != search.end(); ++it) { + if (cur_replace_val != replace.end()) { + replace_value = f$strval(cur_replace_val.get_value()); + ++cur_replace_val; + } else { + replace_value = string{}; + } + + const string &search_string = f$strval(it.get_value()); + if (search_string.size() >= replace_value.size()) { + str_replace_inplace(search_string, replace_value, result, replace_count, with_case); + } else { + result = str_replace(search_string, replace_value, result, replace_count, with_case); + } + } + + return result; +}; + +template +string f$str_replace(const array &search, const array &replace, const string &subject, + int64_t &replace_count = StringLibContext::get().str_replace_count_dummy) noexcept { + replace_count = 0; + return str_replace_string_array(search, replace, subject, replace_count, true); +} + +template +string f$str_ireplace(const array &search, const array &replace, const string &subject, + int64_t &replace_count = StringLibContext::get().str_replace_count_dummy) noexcept { + replace_count = 0; + return str_replace_string_array(search, replace, subject, replace_count, false); +} + +string f$str_replace(const mixed &search, const mixed &replace, const string &subject, + int64_t &replace_count = StringLibContext::get().str_replace_count_dummy) noexcept; + +string f$str_ireplace(const mixed &search, const mixed &replace, const string &subject, + int64_t &replace_count = StringLibContext::get().str_replace_count_dummy) noexcept; + +template> +SubjectT f$str_replace(const T1 &search, const T2 &replace, const SubjectT &subject, + int64_t &replace_count = StringLibContext::get().str_replace_count_dummy) noexcept { + return f$str_replace(search, replace, subject.val(), replace_count); +} + +template> +SubjectT f$str_ireplace(const T1 &search, const T2 &replace, const SubjectT &subject, + int64_t &replace_count = StringLibContext::get().str_replace_count_dummy) noexcept { + return f$str_ireplace(search, replace, subject.val(), replace_count); +} + +mixed f$str_replace(const mixed &search, const mixed &replace, const mixed &subject, + int64_t &replace_count = StringLibContext::get().str_replace_count_dummy) noexcept; + +mixed f$str_ireplace(const mixed &search, const mixed &replace, const mixed &subject, + int64_t &replace_count = StringLibContext::get().str_replace_count_dummy) noexcept; + +array f$str_split(const string &str, int64_t split_length = 1) noexcept; + +inline Optional f$substr(const string &str, int64_t start, int64_t length = std::numeric_limits::max()) noexcept { + if (!wrap_substr_args(str.size(), start, length)) { + return false; + } + return str.substr(static_cast(start), static_cast(length)); +} + +inline Optional f$substr(tmp_string str, int64_t start, int64_t length = std::numeric_limits::max()) noexcept { + if (!wrap_substr_args(str.size, start, length)) { + return false; + } + return string(str.data + start, length); +} + +inline tmp_string f$_tmp_substr(const string &str, int64_t start, int64_t length = std::numeric_limits::max()) noexcept { + if (!wrap_substr_args(str.size(), start, length)) { + return {}; + } + return {str.c_str() + start, static_cast(length)}; +} + +inline tmp_string f$_tmp_substr(tmp_string str, int64_t start, int64_t length = std::numeric_limits::max()) noexcept { + if (!wrap_substr_args(str.size, start, length)) { + return {}; + } + return {str.data + start, static_cast(length)}; +} + +int64_t f$substr_count(const string &haystack, const string &needle, int64_t offset = 0, int64_t length = std::numeric_limits::max()) noexcept; + +string f$substr_replace(const string &str, const string &replacement, int64_t start, int64_t length = std::numeric_limits::max()) noexcept; + +Optional f$substr_compare(const string &main_str, const string &str, int64_t offset, int64_t length = std::numeric_limits::max(), + bool case_insensitivity = false) noexcept; + +inline bool f$str_starts_with(const string &haystack, const string &needle) noexcept { + return haystack.starts_with(needle); +} + +inline bool f$str_ends_with(const string &haystack, const string &needle) noexcept { + return haystack.ends_with(needle); +} + +tmp_string f$_tmp_trim(tmp_string s, const string &what = StringLibConstants::get().WHAT_STR) noexcept; + +tmp_string f$_tmp_trim(const string &s, const string &what = StringLibConstants::get().WHAT_STR) noexcept; + +string f$trim(tmp_string s, const string &what = StringLibConstants::get().WHAT_STR) noexcept; + +string f$trim(const string &s, const string &what = StringLibConstants::get().WHAT_STR) noexcept; + +inline string f$ucfirst(const string &str) noexcept { + int n = str.size(); + if (n == 0) { + return str; + } + + string res(n, false); + res[0] = static_cast(toupper(str[0])); + memcpy(&res[1], &str[1], n - 1); + + return res; +} + +string f$ucwords(const string &str) noexcept; + +Optional> f$unpack(const string &pattern, const string &data) noexcept; + +inline string f$vsprintf(const string &format, const array &args) noexcept { + return f$sprintf(format, args); +} + +string f$wordwrap(const string &str, int64_t width = 75, const string &brk = StringLibConstants::get().NEWLINE_STR, bool cut = false) noexcept; + +namespace hex2char_impl_ { + +struct Hex2CharMapMaker { +private: + static constexpr uint8_t hex2int_char(size_t c) noexcept { + return ('0' <= c && c <= '9') ? static_cast(c - '0') + : ('a' <= c && c <= 'f') ? static_cast(c - 'a' + 10) + : ('A' <= c && c <= 'F') ? static_cast(c - 'A' + 10) + : 16; + } + +public: + template + static constexpr auto make(std::index_sequence /*unused*/) noexcept { + return std::array{{ + hex2int_char(Ints)..., + }}; + } +}; + +}; // namespace hex2char_impl_ + +inline uint8_t hex_to_int(char c) noexcept { + static constexpr auto hex_int_map = hex2char_impl_::Hex2CharMapMaker::make(std::make_index_sequence<256>()); + return hex_int_map[static_cast(c)]; +} + +inline string f$number_format(double number, int64_t decimals = 0) noexcept { + return f$number_format(number, decimals, StringLibConstants::get().DOT_STR, StringLibConstants::get().COLON_STR); +} + +inline string f$number_format(double number, int64_t decimals, const string &dec_point) noexcept { + return f$number_format(number, decimals, dec_point, StringLibConstants::get().COLON_STR); +} + +inline string f$number_format(double number, int64_t decimals, const mixed &dec_point) noexcept { + return f$number_format(number, decimals, dec_point.is_null() ? StringLibConstants::get().DOT_STR : dec_point.to_string(), + StringLibConstants::get().COLON_STR); +} + +inline string f$number_format(double number, int64_t decimals, const string &dec_point, const mixed &thousands_sep) noexcept { + return f$number_format(number, decimals, dec_point, thousands_sep.is_null() ? StringLibConstants::get().COLON_STR : thousands_sep.to_string()); +} + +inline string f$number_format(double number, int64_t decimals, const mixed &dec_point, const string &thousands_sep) noexcept { + return f$number_format(number, decimals, dec_point.is_null() ? StringLibConstants::get().DOT_STR : dec_point.to_string(), thousands_sep); +} + +inline string f$number_format(double number, int64_t decimals, const mixed &dec_point, const mixed &thousands_sep) noexcept { + return f$number_format(number, decimals, dec_point.is_null() ? StringLibConstants::get().DOT_STR : dec_point.to_string(), + thousands_sep.is_null() ? StringLibConstants::get().COLON_STR : thousands_sep.to_string()); +} + +inline int64_t f$strlen(const string &s) noexcept { + return s.size(); +} + +inline Optional f$stristr(const string &haystack, const mixed &needle, bool before_needle = false) noexcept { + if (needle.is_string()) { + return f$stristr(haystack, needle.to_string(), before_needle); + } else { + return f$stristr(haystack, string(1, static_cast(needle.to_int())), before_needle); + } +} + +inline Optional f$strrpos(const string &haystack, const mixed &needle, int64_t offset = 0) noexcept { + if (needle.is_string()) { + return f$strrpos(haystack, needle.to_string(), offset); + } else { + return f$strrpos(haystack, string(1, static_cast(needle.to_int())), offset); + } +} + +inline Optional f$strripos(const string &haystack, const mixed &needle, int64_t offset) noexcept { + if (needle.is_string()) { + return f$strripos(haystack, needle.to_string(), offset); + } else { + return f$strripos(haystack, string(1, static_cast(needle.to_int())), offset); + } +} + +inline Optional f$strstr(const string &haystack, const mixed &needle, bool before_needle = false) noexcept { + if (needle.is_string()) { + return f$strstr(haystack, needle.to_string(), before_needle); + } else { + return f$strstr(haystack, string(1, static_cast(needle.to_int())), before_needle); + } +} + +template +string f$strtr(const string &subject, const array &replace_pairs) noexcept { + const char *piece = subject.c_str(); + const char *piece_end = subject.c_str() + subject.size(); + string result; + while (true) { + const char *best_pos = nullptr; + int64_t best_len = -1; + string replace; + for (typename array::const_iterator p = replace_pairs.begin(); p != replace_pairs.end(); ++p) { + const string search = f$strval(p.get_key()); + int64_t search_len = search.size(); + if (search_len == 0) { + return subject; + } + const char *pos = static_cast(memmem(static_cast(piece), static_cast(piece_end - piece), + static_cast(search.c_str()), static_cast(search_len))); + if (pos != nullptr && (best_pos == nullptr || best_pos > pos || (best_pos == pos && search_len > best_len))) { + best_pos = pos; + best_len = search_len; + replace = f$strval(p.get_value()); + } + } + if (best_pos == nullptr) { + result.append(piece, static_cast(piece_end - piece)); + break; + } + + result.append(piece, static_cast(best_pos - piece)); + result.append(replace); + + piece = best_pos + best_len; + } + + return result; +} + +inline string f$strtr(const string &subject, const mixed &from, const mixed &to) noexcept { + return f$strtr(subject, from.to_string(), to.to_string()); +} + +inline string f$strtr(const string &subject, const mixed &replace_pairs) noexcept { + return f$strtr(subject, replace_pairs.as_array("strtr")); +} + +string f$xor_strings(const string &s, const string &t) noexcept; + +int64_t f$similar_text(const string &first, const string &second, double &percent = StringLibContext::get().default_similar_text_percent_stub) noexcept; + +// similar_text ( string $first , string $second [, float &$percent ] ) : int + +// str_concat_arg generalizes both tmp_string and string arguments; +// it can be constructed from both of them, so concat functions can operate +// on both tmp_string and string types +// there is a special (string, string) overloading for concat2 to +// allow the empty string result optimization to kick in +struct str_concat_arg { + const char *data; + string::size_type size; + + str_concat_arg(const string &s) noexcept + : data{s.c_str()} + , size{s.size()} {} + str_concat_arg(tmp_string s) noexcept + : data{s.data} + , size{s.size} {} + + tmp_string as_tmp_string() const noexcept { + return {data, size}; + } +}; + +// str_concat functions implement efficient string-typed `.` (concatenation) operator implementation; +// apart from being machine-code size efficient (a function call is more compact), they're also +// usually faster as runtime is compiled with -O3 which is almost never the case for translated C++ code +// (it's either -O2 or -Os most of the time) +// +// we choose to have 4 functions (up to 5 arguments) because of the frequency distribution: +// 37619: 2 args +// 20616: 3 args +// 4534: 5 args +// 3791: 4 args +// 935: 7 args +// 565: 6 args +// 350: 9 args +// Both 6 and 7 argument combination already look infrequent enough to not bother +string str_concat(const string &s1, const string &s2) noexcept; +string str_concat(str_concat_arg s1, str_concat_arg s2) noexcept; +string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3) noexcept; +string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4) noexcept; +string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4, str_concat_arg s5) noexcept; diff --git a/runtime-light/allocator/runtime-light-allocator.cpp b/runtime-light/allocator/runtime-light-allocator.cpp index e9a8596fb4..6002b78ca2 100644 --- a/runtime-light/allocator/runtime-light-allocator.cpp +++ b/runtime-light/allocator/runtime-light-allocator.cpp @@ -18,7 +18,7 @@ bool is_script_allocator_available() { void request_extra_memory(size_t requested_size) { const size_t extra_mem_size = std::max(MIN_EXTRA_MEM_SIZE, requested_size); - auto &rt_alloc = RuntimeAllocator::current(); + auto &rt_alloc = RuntimeAllocator::get(); auto *extra_mem = rt_alloc.alloc_global_memory(extra_mem_size); rt_alloc.memory_resource.add_extra_memory(new (extra_mem) memory_resource::extra_memory_pool{extra_mem_size}); } @@ -31,7 +31,7 @@ RuntimeAllocator::RuntimeAllocator(size_t script_mem_size, size_t oom_handling_m memory_resource.init(buffer, script_mem_size, oom_handling_mem_size); } -RuntimeAllocator &RuntimeAllocator::current() noexcept { +RuntimeAllocator &RuntimeAllocator::get() noexcept { return get_component_context()->runtime_allocator; } diff --git a/runtime-light/component/component.cpp b/runtime-light/component/component.cpp index fa041b275a..3c695f0cb0 100644 --- a/runtime-light/component/component.cpp +++ b/runtime-light/component/component.cpp @@ -43,7 +43,7 @@ int32_t merge_output_buffers() noexcept { } // namespace void ComponentState::init_script_execution() noexcept { - kphp_core_context.init(); + runtime_component_context.init(); init_php_scripts_in_each_worker(php_script_mutable_globals_singleton, main_task_); scheduler.suspend(std::make_pair(main_task_.get_handle(), WaitEvent::Rechedule{})); } diff --git a/runtime-light/component/component.h b/runtime-light/component/component.h index 68f21565a7..d557682ceb 100644 --- a/runtime-light/component/component.h +++ b/runtime-light/component/component.h @@ -19,8 +19,8 @@ #include "runtime-light/scheduler/scheduler.h" #include "runtime-light/server/http/http-server-context.h" #include "runtime-light/server/job-worker/job-worker-server-context.h" -#include "runtime-light/stdlib/curl/curl-context.h" #include "runtime-light/stdlib/crypto/crypto-context.h" +#include "runtime-light/stdlib/curl/curl-context.h" #include "runtime-light/stdlib/file/file-stream-context.h" #include "runtime-light/stdlib/fork/fork-context.h" #include "runtime-light/stdlib/job-worker/job-worker-client-context.h" @@ -109,7 +109,7 @@ struct ComponentState { Response response; PhpScriptMutableGlobals php_script_mutable_globals_singleton; - KphpCoreContext kphp_core_context; + RuntimeContext runtime_component_context; RpcComponentContext rpc_component_context; HttpServerComponentContext http_server_component_context{}; JobWorkerClientComponentContext job_worker_client_component_context{}; diff --git a/runtime-light/component/image.h b/runtime-light/component/image.h index 0dc98eb2b5..6c094708b2 100644 --- a/runtime-light/component/image.h +++ b/runtime-light/component/image.h @@ -4,9 +4,12 @@ #pragma once +#include "common/mixin/not_copyable.h" #include "runtime-light/stdlib/rpc/rpc-context.h" +#include "runtime-light/stdlib/string/string-context.h" -struct ImageState { +struct ImageState final : private vk::not_copyable { char *c_linear_mem; RpcImageState rpc_image_state; + StringImageState string_image_state; }; diff --git a/runtime-light/core/globals/php-script-globals.cpp b/runtime-light/core/globals/php-script-globals.cpp index afd86c307f..ce793acb2d 100644 --- a/runtime-light/core/globals/php-script-globals.cpp +++ b/runtime-light/core/globals/php-script-globals.cpp @@ -13,13 +13,13 @@ PhpScriptMutableGlobals &PhpScriptMutableGlobals::current() noexcept { void PhpScriptMutableGlobals::once_alloc_linear_mem(unsigned int n_bytes) { php_assert(g_linear_mem == nullptr); - g_linear_mem = static_cast(RuntimeAllocator::current().alloc0_global_memory(n_bytes)); + g_linear_mem = static_cast(RuntimeAllocator::get().alloc0_global_memory(n_bytes)); } void PhpScriptMutableGlobals::once_alloc_linear_mem(const char *lib_name, unsigned int n_bytes) { int64_t key_lib_name = string_hash(lib_name, strlen(lib_name)); php_assert(libs_linear_mem.find(key_lib_name) == libs_linear_mem.end()); - libs_linear_mem[key_lib_name] = static_cast(RuntimeAllocator::current().alloc0_global_memory(n_bytes)); + libs_linear_mem[key_lib_name] = static_cast(RuntimeAllocator::get().alloc0_global_memory(n_bytes)); } char *PhpScriptMutableGlobals::get_linear_mem(const char *lib_name) const { diff --git a/runtime-light/core/kphp-core-impl/kphp-core-context.cpp b/runtime-light/core/kphp-core-impl/kphp-core-context.cpp index dfda6b038c..e1e18fa55a 100644 --- a/runtime-light/core/kphp-core-impl/kphp-core-context.cpp +++ b/runtime-light/core/kphp-core-impl/kphp-core-context.cpp @@ -9,12 +9,12 @@ constexpr string_size_type initial_minimum_string_buffer_length = 1024; constexpr string_size_type initial_maximum_string_buffer_length = (1 << 24); -KphpCoreContext &KphpCoreContext::current() noexcept { - return get_component_context()->kphp_core_context; +RuntimeContext &RuntimeContext::get() noexcept { + return get_component_context()->runtime_component_context; } -void KphpCoreContext::init() { +void RuntimeContext::init() noexcept { init_string_buffer_lib(initial_minimum_string_buffer_length, initial_maximum_string_buffer_length); } -void KphpCoreContext::free() {} +void RuntimeContext::free() noexcept {} diff --git a/runtime-light/runtime-light.cmake b/runtime-light/runtime-light.cmake index 9b0341d3e4..1adeb09944 100644 --- a/runtime-light/runtime-light.cmake +++ b/runtime-light/runtime-light.cmake @@ -48,6 +48,11 @@ file( GLOB_RECURSE KPHP_RUNTIME_ALL_HEADERS RELATIVE ${BASE_DIR} CONFIGURE_DEPENDS "${RUNTIME_LIGHT_DIR}/*.h") +file( + GLOB_RECURSE KPHP_RUNTIME_COMMON_ALL_HEADERS + RELATIVE ${BASE_DIR} + CONFIGURE_DEPENDS "${RUNTIME_COMMON_DIR}/*.h") +list(APPEND KPHP_RUNTIME_ALL_HEADERS ${KPHP_RUNTIME_COMMON_ALL_HEADERS}) list(TRANSFORM KPHP_RUNTIME_ALL_HEADERS REPLACE "^(.+)$" [[#include "\1"]]) list(JOIN KPHP_RUNTIME_ALL_HEADERS "\n" MERGED_RUNTIME_HEADERS) file( diff --git a/runtime-light/stdlib/output/print-functions.h b/runtime-light/stdlib/output/print-functions.h index 048cbdda4a..0a002a1006 100644 --- a/runtime-light/stdlib/output/print-functions.h +++ b/runtime-light/stdlib/output/print-functions.h @@ -81,11 +81,3 @@ inline Optional f$fputcsv(const mixed &, const array &, string = inline int64_t f$printf(const string &, const array &) { php_critical_error("call to unsupported function"); } - -inline string f$sprintf(const string &, const array &) { - php_critical_error("call to unsupported function"); -} - -inline string f$vsprintf(const string &, const array &) { - php_critical_error("call to unsupported function"); -} diff --git a/runtime-light/stdlib/stdlib.cmake b/runtime-light/stdlib/stdlib.cmake index e2053779da..31d2714f42 100644 --- a/runtime-light/stdlib/stdlib.cmake +++ b/runtime-light/stdlib/stdlib.cmake @@ -21,7 +21,6 @@ prepend( rpc/rpc-tl-error.cpp rpc/rpc-tl-query.cpp rpc/rpc-tl-request.cpp - string/concat.cpp string/string-context.cpp system/system-context.cpp file/file-stream-context.cpp) diff --git a/runtime-light/stdlib/string/concat.cpp b/runtime-light/stdlib/string/concat.cpp deleted file mode 100644 index ae37321157..0000000000 --- a/runtime-light/stdlib/string/concat.cpp +++ /dev/null @@ -1,59 +0,0 @@ -// Compiler for PHP (aka KPHP) -// Copyright (c) 2024 LLC «V Kontakte» -// Distributed under the GPL v3 License, see LICENSE.notice.txt - -#include "runtime-light/stdlib/string/concat.h" - -#include "runtime-common/core/runtime-core.h" - -string str_concat(const string &s1, const string &s2) { - // for 2 argument concatenation it's not so uncommon to have at least one empty string argument; - // it happens in cases like `$prefix . $s` where $prefix could be empty depending on some condition - // real-world applications analysis shows that ~17.6% of all two arguments concatenations have - // at least one empty string argument - // - // checking both lengths for 0 is almost free, but when we step into those 17.6%, we get almost x10 - // faster concatenation and no heap allocations - // - // this idea is borrowed from the Go runtime - if (s1.empty()) { - return s2; - } - if (s2.empty()) { - return s1; - } - - const auto new_size{s1.size() + s2.size()}; - return string{new_size, true}.append_unsafe(s1).append_unsafe(s2).finish_append(); -} - -string str_concat(str_concat_arg s1, str_concat_arg s2) { - const auto new_size{s1.size + s2.size}; - return string{new_size, true}.append_unsafe(s1.as_tmp_string()).append_unsafe(s2.as_tmp_string()).finish_append(); -} - -string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3) { - const auto new_size{s1.size + s2.size + s3.size}; - return string{new_size, true}.append_unsafe(s1.as_tmp_string()).append_unsafe(s2.as_tmp_string()).append_unsafe(s3.as_tmp_string()).finish_append(); -} - -string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4) { - const auto new_size{s1.size + s2.size + s3.size + s4.size}; - return string{new_size, true} - .append_unsafe(s1.as_tmp_string()) - .append_unsafe(s2.as_tmp_string()) - .append_unsafe(s3.as_tmp_string()) - .append_unsafe(s4.as_tmp_string()) - .finish_append(); -} - -string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4, str_concat_arg s5) { - const auto new_size{s1.size + s2.size + s3.size + s4.size + s5.size}; - return string{new_size, true} - .append_unsafe(s1.as_tmp_string()) - .append_unsafe(s2.as_tmp_string()) - .append_unsafe(s3.as_tmp_string()) - .append_unsafe(s4.as_tmp_string()) - .append_unsafe(s5.as_tmp_string()) - .finish_append(); -} diff --git a/runtime-light/stdlib/string/concat.h b/runtime-light/stdlib/string/concat.h deleted file mode 100644 index d73949dd7f..0000000000 --- a/runtime-light/stdlib/string/concat.h +++ /dev/null @@ -1,48 +0,0 @@ -// Compiler for PHP (aka KPHP) -// Copyright (c) 2024 LLC «V Kontakte» -// Distributed under the GPL v3 License, see LICENSE.notice.txt - -#pragma once - -#include "runtime-common/core/runtime-core.h" - -// str_concat_arg generalizes both tmp_string and string arguments; -// it can be constructed from both of them, so concat functions can operate -// on both tmp_string and string types -// there is a special (string, string) overloading for concat2 to -// allow the empty string result optimization to kick in -struct str_concat_arg { - const char *data; - string::size_type size; - - str_concat_arg(const string &s) - : data{s.c_str()} - , size{s.size()} {} - str_concat_arg(tmp_string s) - : data{s.data} - , size{s.size} {} - - tmp_string as_tmp_string() const noexcept { - return {data, size}; - } -}; - -// str_concat functions implement efficient string-typed `.` (concatenation) operator implementation; -// apart from being machine-code size efficient (a function call is more compact), they're also -// usually faster as runtime is compiled with -O3 which is almost never the case for translated C++ code -// (it's either -O2 or -Os most of the time) -// -// we choose to have 4 functions (up to 5 arguments) because of the frequency distribution: -// 37619: 2 args -// 20616: 3 args -// 4534: 5 args -// 3791: 4 args -// 935: 7 args -// 565: 6 args -// 350: 9 args -// Both 6 and 7 argument combination already look infrequent enough to not bother -string str_concat(const string &s1, const string &s2); -string str_concat(str_concat_arg s1, str_concat_arg s2); -string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3); -string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4); -string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4, str_concat_arg s5); diff --git a/runtime-light/stdlib/string/string-context.cpp b/runtime-light/stdlib/string/string-context.cpp index 1509af2676..3c0c48c4b4 100644 --- a/runtime-light/stdlib/string/string-context.cpp +++ b/runtime-light/stdlib/string/string-context.cpp @@ -5,7 +5,13 @@ #include "runtime-light/stdlib/string/string-context.h" #include "runtime-light/component/component.h" +#include "runtime-light/component/image.h" +#include "runtime-light/utils/context.h" StringComponentContext &StringComponentContext::get() noexcept { return get_component_context()->string_component_context; } + +const StringImageState &StringImageState::get() noexcept { + return get_image_state()->string_image_state; +} diff --git a/runtime-light/stdlib/string/string-context.h b/runtime-light/stdlib/string/string-context.h index dccb6c38ed..09d462e8fe 100644 --- a/runtime-light/stdlib/string/string-context.h +++ b/runtime-light/stdlib/string/string-context.h @@ -4,13 +4,7 @@ #pragma once -#include +#include "runtime-common/stdlib/string/string-context.h" -#include "runtime-common/core/runtime-core.h" - -struct StringComponentContext { - int64_t str_replace_count_dummy{}; - double default_similar_text_percent_stub{}; - - static StringComponentContext &get() noexcept; -}; +using StringComponentContext = StringLibContext; +using StringImageState = StringLibConstants; diff --git a/runtime-light/stdlib/string/string-functions.h b/runtime-light/stdlib/string/string-functions.h deleted file mode 100644 index 50accbce6b..0000000000 --- a/runtime-light/stdlib/string/string-functions.h +++ /dev/null @@ -1,112 +0,0 @@ -// Compiler for PHP (aka KPHP) -// Copyright (c) 2024 LLC «V Kontakte» -// Distributed under the GPL v3 License, see LICENSE.notice.txt - -#pragma once - -#include - -#include "runtime-light/stdlib/string/string-context.h" - -inline int64_t f$strlen(const string &s) noexcept { - return s.size(); -} - -inline tmp_string f$_tmp_substr(const string &, int64_t, int64_t = std::numeric_limits::max()) { - php_critical_error("call to unsupported function"); -} - -inline tmp_string f$_tmp_substr(tmp_string, int64_t, int64_t = std::numeric_limits::max()) { - php_critical_error("call to unsupported function"); -} - -inline tmp_string f$_tmp_trim(tmp_string, const string & = string()) { - php_critical_error("call to unsupported function"); -} - -inline tmp_string f$_tmp_trim(const string &, const string & = string()) { - php_critical_error("call to unsupported function"); -} - -inline string f$trim(tmp_string, const string & = string()) { - php_critical_error("call to unsupported function"); -} - -inline string f$trim(const string &, const string & = string()) { - php_critical_error("call to unsupported function"); -} - -inline Optional f$substr(const string &, int64_t, int64_t = std::numeric_limits::max()) { - php_critical_error("call to unsupported function"); -} - -inline Optional f$substr(tmp_string, int64_t, int64_t = std::numeric_limits::max()) { - php_critical_error("call to unsupported function"); -} - -inline string f$pack(const string &, const array &) { - php_critical_error("call to unsupported function"); -} - -inline Optional> f$unpack(const string &, const string &) { - php_critical_error("call to unsupported function"); -} - -inline mixed f$str_replace(const mixed &, const mixed &, const mixed &, int64_t & = StringComponentContext::get().str_replace_count_dummy) { - php_critical_error("call to unsupported function"); -} - -inline string f$str_replace(const string &, const string &, const string &, int64_t & = StringComponentContext::get().str_replace_count_dummy) { - php_critical_error("call to unsupported function"); -} - -template -string f$str_replace(const array &, const array &, const string &, int64_t & = StringComponentContext::get().str_replace_count_dummy) { - php_critical_error("call to unsupported function"); -} - -inline string f$str_replace(const mixed &, const mixed &, const string &, int64_t & = StringComponentContext::get().str_replace_count_dummy) { - php_critical_error("call to unsupported function"); -} - -template> -SubjectT f$str_replace(const T1 &search, const T2 &replace, const SubjectT &subject, - int64_t &replace_count = StringComponentContext::get().str_replace_count_dummy) { - return f$str_replace(search, replace, subject.val(), replace_count); -} - -inline string f$str_ireplace(const string &, const string &, const string &, int64_t & = StringComponentContext::get().str_replace_count_dummy) { - php_critical_error("call to unsupported function"); -} - -template -string f$str_ireplace(const array &, const array &, const string &, int64_t & = StringComponentContext::get().str_replace_count_dummy) { - php_critical_error("call to unsupported function"); -} - -inline string f$str_ireplace(const mixed &, const mixed &, const string &, int64_t & = StringComponentContext::get().str_replace_count_dummy) { - php_critical_error("call to unsupported function"); -} - -template> -SubjectT f$str_ireplace(const T1 &search, const T2 &replace, const SubjectT &subject, - int64_t &replace_count = StringComponentContext::get().str_replace_count_dummy) { - return f$str_ireplace(search, replace, subject.val(), replace_count); -} - -inline mixed f$str_ireplace(const mixed &, const mixed &, const mixed &, int64_t & = StringComponentContext::get().str_replace_count_dummy) { - php_critical_error("call to unsupported function"); -} - -inline int64_t f$similar_text(const string &, const string &, double & = StringComponentContext::get().default_similar_text_percent_stub) { - php_critical_error("call to unsupported function"); -} -template -string f$strtr(const string &, const array &) { - php_critical_error("call to unsupported function"); -} - -template -Optional f$strpos(const string &, const Optional &, int64_t = 0) { - php_critical_error("call to unsupported function"); -} diff --git a/runtime-light/tl/tl-types.h b/runtime-light/tl/tl-types.h index 82c2fa2b0a..f59f1068a6 100644 --- a/runtime-light/tl/tl-types.h +++ b/runtime-light/tl/tl-types.h @@ -65,7 +65,7 @@ struct Maybe final { template struct vector final { using vector_t = memory_resource::stl::vector; - vector_t data{typename vector_t::allocator_type(RuntimeAllocator::current().memory_resource)}; + vector_t data{typename vector_t::allocator_type(RuntimeAllocator::get().memory_resource)}; using iterator = vector_t::iterator; using const_iterator = vector_t::const_iterator; diff --git a/runtime/array_functions.cpp b/runtime/array_functions.cpp index 1589dcb5c6..65aa7edf00 100644 --- a/runtime/array_functions.cpp +++ b/runtime/array_functions.cpp @@ -3,6 +3,7 @@ // Distributed under the GPL v3 License, see LICENSE.notice.txt #include "runtime/array_functions.h" +#include "runtime-common/stdlib/string/string-functions.h" template void walk_parts(const char *d, int64_t d_len, const string &str, int64_t limit, FN handle_part) { diff --git a/runtime/confdata-functions.cpp b/runtime/confdata-functions.cpp index 90d839d744..42e5bb7c1f 100644 --- a/runtime/confdata-functions.cpp +++ b/runtime/confdata-functions.cpp @@ -5,9 +5,8 @@ #include "runtime/confdata-functions.h" #include "common/algorithms/contains.h" - +#include "runtime-common/stdlib/string/string-functions.h" #include "runtime/confdata-global-manager.h" -#include "runtime/string_functions.h" namespace { diff --git a/runtime/context/runtime-context.cpp b/runtime/context/runtime-context.cpp index bc4bc48ffb..a220f9364c 100644 --- a/runtime/context/runtime-context.cpp +++ b/runtime/context/runtime-context.cpp @@ -2,20 +2,26 @@ // Copyright (c) 2024 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt -#include "runtime-context.h" +#include "runtime/context/runtime-context.h" -#include "common/kprintf.h" -#include "runtime/allocator.h" +#include "runtime-common/core/runtime-core.h" +#include "server/php-engine-vars.h" -KphpRuntimeContext kphp_runtime_context; +RuntimeContext kphp_runtime_context; RuntimeAllocator runtime_allocator; -void KphpRuntimeContext::init(void *mem, size_t script_mem_size, size_t oom_handling_mem_size) { - runtime_allocator.init(mem, script_mem_size, oom_handling_mem_size); - KphpCoreContext::init(); +RuntimeContext &RuntimeContext::get() noexcept { + return kphp_runtime_context; } -void KphpRuntimeContext::free() { - KphpCoreContext::free(); - runtime_allocator.free(); +void RuntimeContext::init() noexcept { + if (static_buffer_length_limit < 0) { + init_string_buffer_lib(266175, (1 << 24)); + } else { + init_string_buffer_lib(266175, static_buffer_length_limit); + } +} + +void RuntimeContext::free() noexcept { + free_migration_php8(); } diff --git a/runtime/context/runtime-context.h b/runtime/context/runtime-context.h index d94a0f802a..b498f8cf35 100644 --- a/runtime/context/runtime-context.h +++ b/runtime/context/runtime-context.h @@ -6,16 +6,5 @@ #include "runtime-common/core/runtime-core.h" -#include "common/smart_ptrs/singleton.h" - -struct KphpRuntimeContext : KphpCoreContext { - - void init(void *mem, size_t script_mem_size, size_t oom_handling_mem_size); - void free(); - - string_buffer static_SB; - string_buffer static_SB_spare; -}; - -extern KphpRuntimeContext kphp_runtime_context; +extern RuntimeContext kphp_runtime_context; extern RuntimeAllocator runtime_allocator; diff --git a/runtime/context/runtime-core-allocator.cpp b/runtime/context/runtime-core-allocator.cpp index c9002913a4..99c38d76fc 100644 --- a/runtime/context/runtime-core-allocator.cpp +++ b/runtime/context/runtime-core-allocator.cpp @@ -2,7 +2,6 @@ // Copyright (c) 2024 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt -#include "runtime-common/core/runtime-core.h" #include "runtime/allocator.h" #include "runtime/context/runtime-context.h" @@ -14,7 +13,7 @@ void RuntimeAllocator::free() { dl::free_script_allocator(); } -RuntimeAllocator &RuntimeAllocator::current() noexcept { +RuntimeAllocator &RuntimeAllocator::get() noexcept { return runtime_allocator; } diff --git a/runtime/context/runtime-core-context.cpp b/runtime/context/runtime-core-context.cpp deleted file mode 100644 index b14f1a0f4f..0000000000 --- a/runtime/context/runtime-core-context.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// Compiler for PHP (aka KPHP) -// Copyright (c) 2024 LLC «V Kontakte» -// Distributed under the GPL v3 License, see LICENSE.notice.txt - -#include "common/smart_ptrs/singleton.h" -#include "runtime/context/runtime-context.h" -#include "server/php-engine-vars.h" -#include "runtime/allocator.h" - -KphpCoreContext &KphpCoreContext::current() noexcept { - return kphp_runtime_context; -} - -void KphpCoreContext::init() { - if (static_buffer_length_limit < 0) { - init_string_buffer_lib(266175, (1 << 24)); - } else { - init_string_buffer_lib(266175, static_buffer_length_limit); - } -} - -void KphpCoreContext::free() { - free_migration_php8(); -} diff --git a/runtime/datetime/datetime_functions.cpp b/runtime/datetime/datetime_functions.cpp index 0bd9f0eede..7cfe7ce4d6 100644 --- a/runtime/datetime/datetime_functions.cpp +++ b/runtime/datetime/datetime_functions.cpp @@ -4,16 +4,15 @@ #include "runtime/datetime/datetime_functions.h" +#include #include #include #include -#include +#include "runtime-common/stdlib/string/string-context.h" #include "runtime/context/runtime-context.h" #include "runtime/critical_section.h" #include "runtime/datetime/timelib_wrapper.h" -#include "runtime/string_functions.h" -#include "server/server-log.h" extern long timezone; @@ -575,11 +574,11 @@ string f$strftime(const string &format, int64_t timestamp) { time_t timestamp_t = timestamp; localtime_r(×tamp_t, &t); - if (!strftime(php_buf, PHP_BUF_LEN, format.c_str(), &t)) { + if (!strftime(StringLibContext::get().static_buf.data(), StringLibContext::STATIC_BUFFER_LENGTH, format.c_str(), &t)) { return {}; } - return string(php_buf); + return string(StringLibContext::get().static_buf.data()); } Optional f$strtotime(const string &time_str, int64_t timestamp) { diff --git a/runtime/files.cpp b/runtime/files.cpp index b33af43652..7a4fac7d86 100644 --- a/runtime/files.cpp +++ b/runtime/files.cpp @@ -16,13 +16,12 @@ #include "common/kernel-version.h" #include "common/macos-ports.h" #include "common/wrappers/mkdir_recursive.h" - +#include "runtime-common/stdlib/string/string-context.h" #include "runtime/context/runtime-context.h" #include "runtime/critical_section.h" #include "runtime/interface.h" #include "runtime/kphp_tracing.h" #include "runtime/streams.h" -#include "runtime/string_functions.h" //php_buf, TODO static int32_t opened_fd{-1}; @@ -176,11 +175,11 @@ bool f$copy(const string &from, const string &to) { size_t size = stat_buf.st_size; while (size > 0) { - size_t len = min(size, (size_t)PHP_BUF_LEN); - if (read_safe(read_fd, php_buf, len, from) < (ssize_t)len) { + size_t len = min(size, (size_t)StringLibContext::STATIC_BUFFER_LENGTH); + if (read_safe(read_fd, StringLibContext::get().static_buf.data(), len, from) < (ssize_t)len) { break; } - if (write_safe(write_fd, php_buf, len, to) < (ssize_t)len) { + if (write_safe(write_fd, StringLibContext::get().static_buf.data(), len, to) < (ssize_t)len) { break; } size -= len; @@ -781,13 +780,13 @@ static Optional file_fpassthru(const Stream &stream) { dl::enter_critical_section();//OK while (!feof(f)) { clearerr(f); - size_t res_size = fread(&php_buf[0], 1, PHP_BUF_LEN, f); + size_t res_size = fread(StringLibContext::get().static_buf.data(), 1, StringLibContext::STATIC_BUFFER_LENGTH, f); if (ferror(f)) { dl::leave_critical_section(); php_warning("Error happened during fpassthru from file \"%s\"", stream.to_string().c_str()); return false; } - print(php_buf, res_size); + print(StringLibContext::get().static_buf.data(), res_size); result += static_cast(res_size); } dl::leave_critical_section(); diff --git a/runtime/interface.cpp b/runtime/interface.cpp index 5281810390..cabdd79187 100644 --- a/runtime/interface.cpp +++ b/runtime/interface.cpp @@ -21,6 +21,8 @@ #include "common/wrappers/overloaded.h" #include "net/net-connections.h" +#include "runtime-common/stdlib/string/string-context.h" +#include "runtime-common/stdlib/string/string-functions.h" #include "runtime/array_functions.h" #include "runtime/bcmath.h" #include "runtime/confdata-functions.h" @@ -51,7 +53,6 @@ #include "runtime/resumable.h" #include "runtime/rpc.h" #include "runtime/streams.h" -#include "runtime/string_functions.h" #include "runtime/tcp.h" #include "runtime/thread-pool.h" #include "runtime/typed_rpc.h" @@ -958,7 +959,7 @@ class post_reader { buf_pos(0), boundary(boundary) { if (post == nullptr) { - buf = php_buf; + buf = StringLibContext::get().static_buf.data(); buf_len = 0; } else { buf = (char *)post; @@ -989,9 +990,10 @@ class post_reader { buf_pos += to_erase; i -= to_erase; - buf_len = to_leave + http_load_long_query(buf + to_leave, min(to_leave, left), min(PHP_BUF_LEN - to_leave, left)); + buf_len = + to_leave + http_load_long_query(buf + to_leave, min(to_leave, left), min(StringLibContext::STATIC_BUFFER_LENGTH - to_leave, left)); } else { - buf_len = http_load_long_query(buf, min(2 * chunk_size, left), min(PHP_BUF_LEN, left)); + buf_len = http_load_long_query(buf, min(2 * chunk_size, left), min(StringLibContext::STATIC_BUFFER_LENGTH, left)); } } @@ -1161,7 +1163,9 @@ class post_reader { buf_pos += to_erase; pos += to_write; - buf_len = to_leave + http_load_long_query(buf + to_leave, min(PHP_BUF_LEN - to_leave, left), min(PHP_BUF_LEN - to_leave, left)); + buf_len = to_leave + + http_load_long_query(buf + to_leave, min(StringLibContext::STATIC_BUFFER_LENGTH - to_leave, left), + min(StringLibContext::STATIC_BUFFER_LENGTH - to_leave, left)); } php_assert (s != nullptr); @@ -1724,8 +1728,8 @@ static void init_superglobals_impl(const http_query_data &http_data, const rpc_q if (!is_parsed) { int loaded = 0; while (loaded < http_data.post_len) { - int to_load = min(PHP_BUF_LEN, http_data.post_len - loaded); - http_load_long_query(php_buf, to_load, to_load); + int to_load = min(StringLibContext::STATIC_BUFFER_LENGTH, http_data.post_len - loaded); + http_load_long_query(StringLibContext::get().static_buf.data(), to_load, to_load); loaded += to_load; } } @@ -2301,7 +2305,7 @@ static void init_interface_lib() { finished = false; php_warning_level = std::max(2, php_warning_minimum_level); - KphpCoreContext::current().php_disable_warnings = 0; + RuntimeContext::get().php_disable_warnings = 0; is_json_log_on_timeout_enabled = true; is_demangled_stacktrace_logs_enabled = false; ignore_level = 0; @@ -2450,7 +2454,8 @@ void global_init_script_allocator() { } void init_runtime_environment(const php_query_data_t &data, PhpScriptBuiltInSuperGlobals &superglobals, void *mem, size_t script_mem_size, size_t oom_handling_mem_size) { - kphp_runtime_context.init(mem, script_mem_size, oom_handling_mem_size); + runtime_allocator.init(mem, script_mem_size, oom_handling_mem_size); + kphp_runtime_context.init(); reset_global_interface_vars(superglobals); init_runtime_libs(); init_superglobals(data, superglobals); @@ -2461,6 +2466,7 @@ void free_runtime_environment(PhpScriptBuiltInSuperGlobals &superglobals) { free_runtime_libs(); reset_global_interface_vars(superglobals); kphp_runtime_context.free(); + runtime_allocator.free(); } void worker_global_init(WorkerType worker_type) noexcept { diff --git a/runtime/json-functions.cpp b/runtime/json-functions.cpp index fcb920602f..b4584092d6 100644 --- a/runtime/json-functions.cpp +++ b/runtime/json-functions.cpp @@ -5,9 +5,7 @@ #include "runtime/json-functions.h" #include "common/algorithms/find.h" - -#include "runtime/exception.h" -#include "runtime/string_functions.h" +#include "runtime-common/stdlib/string/string-functions.h" // note: json-functions.cpp is used for non-typed json implementation: for json_encode() and json_decode() // for classes, e.g. `JsonEncoder::encode(new A)`, see json-writer.cpp and from/to visitors diff --git a/runtime/kphp_tracing.cpp b/runtime/kphp_tracing.cpp index 9355556188..9bd528dd40 100644 --- a/runtime/kphp_tracing.cpp +++ b/runtime/kphp_tracing.cpp @@ -3,16 +3,16 @@ // Distributed under the GPL v3 License, see LICENSE.notice.txt #include "runtime/kphp_tracing.h" -#include "runtime/kphp_tracing_binlog.h" #include +#include "runtime-common/stdlib/string/string-functions.h" #include "runtime/critical_section.h" -#include "runtime/job-workers/job-interface.h" #include "runtime/interface.h" +#include "runtime/job-workers/job-interface.h" +#include "runtime/kphp_tracing_binlog.h" #include "runtime/math_functions.h" #include "runtime/resumable.h" -#include "runtime/string_functions.h" #include "runtime/tl/rpc_function.h" #include "runtime/tl/tl_magics_decoding.h" diff --git a/runtime/kphp_tracing_binlog.cpp b/runtime/kphp_tracing_binlog.cpp index 03c65672ea..3020fc9c29 100644 --- a/runtime/kphp_tracing_binlog.cpp +++ b/runtime/kphp_tracing_binlog.cpp @@ -9,13 +9,11 @@ #include #include -#include "runtime/critical_section.h" +#include "runtime-common/stdlib/string/string-context.h" #include "runtime/allocator.h" +#include "runtime/critical_section.h" #include "server/json-logger.h" - -extern const char lhex_digits[17]; - namespace kphp_tracing { // when updating binlog protocol (and decoder), don't forget to bump this version @@ -245,8 +243,8 @@ void tracing_binary_buffer::output_to_json_log(const char *json_without_binlog) for (const auto &it : chunks_ordered) { for (const unsigned char *p = reinterpret_cast(it->buf), *end = p + it->size_bytes; p != end; ++p) { - buffer[buffer_i++] = lhex_digits[(*p & 0xF0) >> 4]; - buffer[buffer_i++] = lhex_digits[(*p & 0x0F)]; + buffer[buffer_i++] = StringLibConstants::get().lhex_digits[(*p & 0xF0) >> 4]; + buffer[buffer_i++] = StringLibConstants::get().lhex_digits[(*p & 0x0F)]; } } diff --git a/runtime/mail.cpp b/runtime/mail.cpp index 3d7012f174..3706330d1e 100644 --- a/runtime/mail.cpp +++ b/runtime/mail.cpp @@ -7,9 +7,9 @@ #include +#include "runtime-common/stdlib/string/string-functions.h" #include "runtime/critical_section.h" #include "runtime/interface.h" -#include "runtime/string_functions.h" static bool check_header(const string &str) { int str_len = (int)str.size(); diff --git a/runtime/math_functions.cpp b/runtime/math_functions.cpp index bbb41c88f6..928b5ad87a 100644 --- a/runtime/math_functions.cpp +++ b/runtime/math_functions.cpp @@ -2,6 +2,8 @@ // Copyright (c) 2020 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt +#include "runtime/math_functions.h" + #include #include #include @@ -15,10 +17,10 @@ #endif #include "common/cycleclock.h" -#include "runtime/math_functions.h" -#include "runtime/exception.h" +#include "runtime-common/stdlib/string/string-context.h" +#include "runtime-common/stdlib/string/string-functions.h" +#include "runtime/allocator.h" #include "runtime/critical_section.h" -#include "runtime/string_functions.h" #include "server/php-engine-vars.h" namespace { @@ -83,7 +85,7 @@ string f$dechex(int64_t number) noexcept { int i = 16; do { - s[--i] = lhex_digits[v & 15]; + s[--i] = StringLibConstants::get().lhex_digits[v & 15]; v >>= 4; } while (v > 0); diff --git a/runtime/mbstring.cpp b/runtime/mbstring.cpp index 8fa5a03be6..53f9e462ff 100644 --- a/runtime/mbstring.cpp +++ b/runtime/mbstring.cpp @@ -1,11 +1,12 @@ // Compiler for PHP (aka KPHP) -// Copyright (c) 2020 LLC «V Kontakte» +// Copyright (c) 2024 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt #include "runtime/mbstring.h" #include "common/unicode/unicode-utils.h" #include "common/unicode/utf8-utils.h" +#include "runtime-common/stdlib/string/string-functions.h" static bool is_detect_incorrect_encoding_names_warning{false}; @@ -18,7 +19,7 @@ void free_detect_incorrect_encoding_names() { } static int mb_detect_encoding_new(const string &encoding) { - const auto encoding_name = f$strtolower(encoding).c_str(); + const auto *encoding_name = f$strtolower(encoding).c_str(); if (!strcmp(encoding_name, "cp1251") || !strcmp(encoding_name, "cp-1251") || !strcmp(encoding_name, "windows-1251")) { return 1251; diff --git a/runtime/mbstring.h b/runtime/mbstring.h index 7ef6d72b38..bcb101b933 100644 --- a/runtime/mbstring.h +++ b/runtime/mbstring.h @@ -1,29 +1,33 @@ // Compiler for PHP (aka KPHP) -// Copyright (c) 2020 LLC «V Kontakte» +// Copyright (c) 2024 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt #pragma once #include +#include #include "runtime-common/core/runtime-core.h" -#include "runtime/string_functions.h" +#include "runtime-common/stdlib/string/string-context.h" bool mb_UTF8_check(const char *s); -bool f$mb_check_encoding(const string &str, const string &encoding = CP1251); +bool f$mb_check_encoding(const string &str, const string &encoding = StringLibConstants::get().CP1251_STR); -int64_t f$mb_strlen(const string &str, const string &encoding = CP1251); +int64_t f$mb_strlen(const string &str, const string &encoding = StringLibConstants::get().CP1251_STR); -string f$mb_strtolower(const string &str, const string &encoding = CP1251); +string f$mb_strtolower(const string &str, const string &encoding = StringLibConstants::get().CP1251_STR); -string f$mb_strtoupper(const string &str, const string &encoding = CP1251); +string f$mb_strtoupper(const string &str, const string &encoding = StringLibConstants::get().CP1251_STR); -Optional f$mb_strpos(const string &haystack, const string &needle, int64_t offset = 0, const string &encoding = CP1251) noexcept; +Optional f$mb_strpos(const string &haystack, const string &needle, int64_t offset = 0, + const string &encoding = StringLibConstants::get().CP1251_STR) noexcept; -Optional f$mb_stripos(const string &haystack, const string &needle, int64_t offset = 0, const string &encoding = CP1251) noexcept; +Optional f$mb_stripos(const string &haystack, const string &needle, int64_t offset = 0, + const string &encoding = StringLibConstants::get().CP1251_STR) noexcept; -string f$mb_substr(const string &str, int64_t start, const mixed &length = std::numeric_limits::max(), const string &encoding = CP1251); +string f$mb_substr(const string &str, int64_t start, const mixed &length = std::numeric_limits::max(), + const string &encoding = StringLibConstants::get().CP1251_STR); void f$set_detect_incorrect_encoding_names_warning(bool show); diff --git a/runtime/openssl.cpp b/runtime/openssl.cpp index ef6635596b..571fab2e31 100644 --- a/runtime/openssl.cpp +++ b/runtime/openssl.cpp @@ -27,9 +27,10 @@ #include "common/wrappers/openssl.h" #include "common/wrappers/string_view.h" #include "common/wrappers/to_array.h" - -#include "runtime/array_functions.h" +#include "runtime-common/stdlib/string/string-context.h" +#include "runtime-common/stdlib/string/string-functions.h" #include "runtime/allocator.h" +#include "runtime/array_functions.h" #include "runtime/critical_section.h" #include "runtime/datetime/datetime_functions.h" #include "runtime/files.h" @@ -50,8 +51,8 @@ struct HashTraits { if (!raw_output) { for (int64_t i = hash_len - 1; i >= 0; i--) { - res[2 * i + 1] = lhex_digits[res[i] & 15]; - res[2 * i] = lhex_digits[(res[i] >> 4) & 15]; + res[2 * i + 1] = StringLibConstants::get().lhex_digits[res[i] & 15]; + res[2 * i] = StringLibConstants::get().lhex_digits[(res[i] >> 4) & 15]; } } return res; @@ -168,15 +169,15 @@ Optional f$md5_file(const string &file_name, bool raw_output) { size_t size = stat_buf.st_size; while (size > 0) { - size_t len = min(size, (size_t)PHP_BUF_LEN); - if (read_safe(read_fd, php_buf, len, file_name) < (ssize_t)len) { + size_t len = min(size, (size_t)StringLibContext::STATIC_BUFFER_LENGTH); + if (read_safe(read_fd, StringLibContext::get().static_buf.data(), len, file_name) < (ssize_t)len) { break; } - php_assert (MD5_Update(&c, static_cast (php_buf), (unsigned long)len) == 1); + php_assert (MD5_Update(&c, static_cast (StringLibContext::get().static_buf.data()), (unsigned long)len) == 1); size -= len; } close(read_fd); - php_assert (MD5_Final(reinterpret_cast (php_buf), &c) == 1); + php_assert (MD5_Final(reinterpret_cast (StringLibContext::get().static_buf.data()), &c) == 1); critical_section.leave_critical_section(); if (size > 0) { @@ -187,12 +188,12 @@ Optional f$md5_file(const string &file_name, bool raw_output) { if (!raw_output) { string res(32, false); for (int i = 15; i >= 0; i--) { - res[2 * i + 1] = lhex_digits[php_buf[i] & 15]; - res[2 * i] = lhex_digits[(php_buf[i] >> 4) & 15]; + res[2 * i + 1] = StringLibConstants::get().lhex_digits[StringLibContext::get().static_buf.data()[i] & 15]; + res[2 * i] = StringLibConstants::get().lhex_digits[(StringLibContext::get().static_buf.data()[i] >> 4) & 15]; } return res; } else { - return string(php_buf, 16); + return string(StringLibContext::get().static_buf.data(), 16); } } @@ -222,11 +223,11 @@ int64_t f$crc32_file(const string &file_name) { uint32_t res = std::numeric_limits::max(); size_t size = stat_buf.st_size; while (size > 0) { - size_t len = min(size, (size_t)PHP_BUF_LEN); - if (read_safe(read_fd, php_buf, len, file_name) < (ssize_t)len) { + size_t len = min(size, (size_t)StringLibContext::STATIC_BUFFER_LENGTH); + if (read_safe(read_fd, StringLibContext::get().static_buf.data(), len, file_name) < (ssize_t)len) { break; } - res = crc32_partial(php_buf, (int)len, res); + res = crc32_partial(StringLibContext::get().static_buf.data(), (int)len, res); size -= len; } close(read_fd); @@ -373,11 +374,11 @@ bool f$openssl_public_encrypt(const string &data, string &result, const string & } int key_size = EVP_PKEY_size(pkey); - php_assert (PHP_BUF_LEN >= key_size); + php_assert (StringLibContext::STATIC_BUFFER_LENGTH >= key_size); RSA_ptr rsa{EVP_PKEY_get1_RSA(pkey)}; if (RSA_public_encrypt(static_cast(data.size()), reinterpret_cast(data.c_str()), - reinterpret_cast(php_buf), rsa.get(), RSA_PKCS1_PADDING) != key_size) { + reinterpret_cast(StringLibContext::get().static_buf.data()), rsa.get(), RSA_PKCS1_PADDING) != key_size) { if (!from_cache) { EVP_PKEY_free(pkey); } @@ -389,7 +390,7 @@ bool f$openssl_public_encrypt(const string &data, string &result, const string & if (!from_cache) { EVP_PKEY_free(pkey); } - result = string(php_buf, key_size); + result = string(StringLibContext::get().static_buf.data(), key_size); return true; } @@ -423,11 +424,11 @@ bool f$openssl_private_decrypt(const string &data, string &result, const string } int key_size = EVP_PKEY_size(pkey); - php_assert (PHP_BUF_LEN >= key_size); + php_assert (StringLibContext::STATIC_BUFFER_LENGTH >= key_size); RSA_ptr rsa{EVP_PKEY_get1_RSA(pkey)}; int len = RSA_private_decrypt(static_cast(data.size()), reinterpret_cast(data.c_str()), - reinterpret_cast(php_buf), rsa.get(), RSA_PKCS1_PADDING); + reinterpret_cast(StringLibContext::get().static_buf.data()), rsa.get(), RSA_PKCS1_PADDING); if (!from_cache) { EVP_PKEY_free(pkey); } @@ -437,7 +438,7 @@ bool f$openssl_private_decrypt(const string &data, string &result, const string return false; } - result.assign(php_buf, len); + result.assign(StringLibContext::get().static_buf.data(), len); return true; } diff --git a/runtime/php_assert.cpp b/runtime/php_assert.cpp index 109e2c44fc..03f6629707 100644 --- a/runtime/php_assert.cpp +++ b/runtime/php_assert.cpp @@ -92,7 +92,7 @@ static void print_demangled_adresses(void **buffer, int nptrs, int num_shift, bo } static void php_warning_impl(bool out_of_memory, int error_type, char const *message, va_list args) { - if (php_warning_level == 0 || KphpCoreContext::current().php_disable_warnings) { + if (php_warning_level == 0 || RuntimeContext::get().php_disable_warnings) { return; } diff --git a/runtime/runtime.cmake b/runtime/runtime.cmake index 22f523838e..f0acdc1f4e 100644 --- a/runtime/runtime.cmake +++ b/runtime/runtime.cmake @@ -66,7 +66,6 @@ prepend(KPHP_RUNTIME_SOURCES ${BASE_DIR}/runtime/ ${KPHP_RUNTIME_PDO_PGSQL_SOURCES} allocator.cpp context/runtime-core-allocator.cpp - context/runtime-core-context.cpp context/runtime-context.cpp array_functions.cpp bcmath.cpp @@ -114,6 +113,7 @@ prepend(KPHP_RUNTIME_SOURCES ${BASE_DIR}/runtime/ storage.cpp streams.cpp string_functions.cpp + string-context.cpp tl/rpc_req_error.cpp tl/rpc_tl_query.cpp tl/rpc_response.cpp @@ -174,11 +174,11 @@ file(GLOB_RECURSE KPHP_RUNTIME_ALL_HEADERS RELATIVE ${BASE_DIR} CONFIGURE_DEPENDS "${BASE_DIR}/runtime/*.h") -file(GLOB_RECURSE KPHP_RUNTIME_CORE_ALL_HEADERS +file(GLOB_RECURSE KPHP_RUNTIME_COMMON_ALL_HEADERS RELATIVE ${BASE_DIR} CONFIGURE_DEPENDS "${BASE_DIR}/runtime-common/*.h") -list(APPEND KPHP_RUNTIME_ALL_HEADERS ${KPHP_RUNTIME_CORE_ALL_HEADERS}) +list(APPEND KPHP_RUNTIME_ALL_HEADERS ${KPHP_RUNTIME_COMMON_ALL_HEADERS}) list(TRANSFORM KPHP_RUNTIME_ALL_HEADERS REPLACE "^(.+)$" [[#include "\1"]]) list(JOIN KPHP_RUNTIME_ALL_HEADERS "\n" MERGED_RUNTIME_HEADERS) file(WRITE ${AUTO_DIR}/runtime/runtime-headers.h "\ diff --git a/runtime/streams.cpp b/runtime/streams.cpp index 3a9c47ad0d..c06e129268 100644 --- a/runtime/streams.cpp +++ b/runtime/streams.cpp @@ -8,8 +8,9 @@ #include #include -#include "runtime/array_functions.h" +#include "runtime-common/stdlib/string/string-functions.h" #include "runtime/allocator.h" +#include "runtime/array_functions.h" #include "runtime/critical_section.h" static string::size_type max_wrapper_name_size = 0; diff --git a/runtime/string-context.cpp b/runtime/string-context.cpp new file mode 100644 index 0000000000..cf7d07e630 --- /dev/null +++ b/runtime/string-context.cpp @@ -0,0 +1,17 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2024 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#include "runtime-common/stdlib/string/string-context.h" + +static StringLibContext string_lib_context; + +StringLibContext &StringLibContext::get() noexcept { + return string_lib_context; +} + +const static StringLibConstants string_lib_constants; + +const StringLibConstants &StringLibConstants::get() noexcept { + return string_lib_constants; +} diff --git a/runtime/string_functions.cpp b/runtime/string_functions.cpp index 18756a4512..0b450b759e 100644 --- a/runtime/string_functions.cpp +++ b/runtime/string_functions.cpp @@ -1,999 +1,25 @@ // Compiler for PHP (aka KPHP) -// Copyright (c) 2020 LLC «V Kontakte» +// Copyright (c) 2024 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt #include "runtime/string_functions.h" -#include -#include -#include - -#include "common/macos-ports.h" #include "common/unicode/unicode-utils.h" - -#include "runtime/context/runtime-context.h" +#include "runtime-common/stdlib/string/string-functions.h" #include "runtime/interface.h" - -// For "f$str_getcsv" support #include "runtime/streams.h" -const string COLON(",", 1); -const string CP1251("cp1251"); -const string DOT(".", 1); -const string NEW_LINE("\n", 1); -const string SPACE(" ", 1); -const string WHAT(" \n\r\t\v\0", 6); - -static const string ONE("1", 1); -static const string PERCENT("%", 1); - -char php_buf[PHP_BUF_LEN + 1]; - -const char lhex_digits[17] = "0123456789abcdef"; -const char uhex_digits[17] = "0123456789ABCDEF"; - -int64_t str_replace_count_dummy; - -static inline const char *get_mask(const string &what) { - static char mask[256]; - memset(mask, 0, 256); - - int len = what.size(); - for (int i = 0; i < len; i++) { - unsigned char c = what[i]; - if (what[i + 1] == '.' && what[i + 2] == '.' && (unsigned char)what[i + 3] >= c) { - memset(mask + c, 1, (unsigned char)what[i + 3] - c + 1); - i += 3; - } else if (c == '.' && what[i + 1] == '.') { - php_warning("Invalid '..'-range in string \"%s\" at position %d.", what.c_str(), i); - } else { - mask[c] = 1; - } - } - - return mask; -} - -string f$addcslashes(const string &str, const string &what) { - const char *mask = get_mask(what); - - int len = str.size(); - kphp_runtime_context.static_SB.clean().reserve(4 * len); - - for (int i = 0; i < len; i++) { - unsigned char c = str[i]; - if (mask[c]) { - kphp_runtime_context.static_SB.append_char('\\'); - if (c < 32 || c > 126) { - switch (c) { - case '\n': - kphp_runtime_context.static_SB.append_char('n'); - break; - case '\t': - kphp_runtime_context.static_SB.append_char('t'); - break; - case '\r': - kphp_runtime_context.static_SB.append_char('r'); - break; - case '\a': - kphp_runtime_context.static_SB.append_char('a'); - break; - case '\v': - kphp_runtime_context.static_SB.append_char('v'); - break; - case '\b': - kphp_runtime_context.static_SB.append_char('b'); - break; - case '\f': - kphp_runtime_context.static_SB.append_char('f'); - break; - default: - kphp_runtime_context.static_SB.append_char(static_cast((c >> 6) + '0')); - kphp_runtime_context.static_SB.append_char(static_cast(((c >> 3) & 7) + '0')); - kphp_runtime_context.static_SB.append_char(static_cast((c & 7) + '0')); - } - } else { - kphp_runtime_context.static_SB.append_char(c); - } - } else { - kphp_runtime_context.static_SB.append_char(c); - } - } - return kphp_runtime_context.static_SB.str(); -} - -string f$addslashes(const string &str) { - int len = str.size(); - - kphp_runtime_context.static_SB.clean().reserve(2 * len); - for (int i = 0; i < len; i++) { - switch (str[i]) { - case '\0': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('0'); - break; - case '\'': - case '\"': - case '\\': - kphp_runtime_context.static_SB.append_char('\\'); - /* fallthrough */ - default: - kphp_runtime_context.static_SB.append_char(str[i]); - } - } - return kphp_runtime_context.static_SB.str(); -} - -string f$bin2hex(const string &str) { - int len = str.size(); - string result(2 * len, false); - - for (int i = 0; i < len; i++) { - result[2 * i] = lhex_digits[(str[i] >> 4) & 15]; - result[2 * i + 1] = lhex_digits[str[i] & 15]; - } - - return result; -} - -string f$chop(const string &s, const string &what) { - return f$rtrim(s, what); -} - -string f$chr(int64_t v) { - return {1, static_cast(v)}; -} - -static const unsigned char win_to_koi[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, - 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, - 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, - 154, 174, 190, 46, 159, 189, 46, 46, 179, 191, 180, 157, 46, 46, 156, 183, - 46, 46, 182, 166, 173, 46, 46, 158, 163, 152, 164, 155, 46, 46, 46, 167, - 225, 226, 247, 231, 228, 229, 246, 250, 233, 234, 235, 236, 237, 238, 239, 240, - 242, 243, 244, 245, 230, 232, 227, 254, 251, 253, 255, 249, 248, 252, 224, 241, - 193, 194, 215, 199, 196, 197, 214, 218, 201, 202, 203, 204, 205, 206, 207, 208, - 210, 211, 212, 213, 198, 200, 195, 222, 219, 221, 223, 217, 216, 220, 192, 209}; - -static const unsigned char koi_to_win[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 184, 186, 32, 179, 191, 32, 32, 32, 32, 32, 180, 162, 32, - 32, 32, 32, 168, 170, 32, 178, 175, 32, 32, 32, 32, 32, 165, 161, 169, - 254, 224, 225, 246, 228, 229, 244, 227, 245, 232, 233, 234, 235, 236, 237, 238, - 239, 255, 240, 241, 242, 243, 230, 226, 252, 251, 231, 248, 253, 249, 247, 250, - 222, 192, 193, 214, 196, 197, 212, 195, 213, 200, 201, 202, 203, 204, 205, 206, - 207, 223, 208, 209, 210, 211, 198, 194, 220, 219, 199, 216, 221, 217, 215, 218}; - -string f$convert_cyr_string(const string &str, const string &from_s, const string &to_s) { - char from = (char)toupper(from_s[0]); - char to = (char)toupper(to_s[0]); - - const unsigned char *table = nullptr; - if (from == 'W' && to == 'K') { - table = win_to_koi; - } - if (from == 'K' && to == 'W') { - table = koi_to_win; - } - if (table == nullptr) { - php_critical_error ("unsupported conversion from '%c' to '%c' in function convert_cyr_string", from, to); - return str; - } - - int len = str.size(); - string result(len, false); - for (int i = 0; i < len; i++) { - result[i] = table[(unsigned char)str[i]]; - } - return result; -} - -mixed f$count_chars(const string &str, int64_t mode) { - int64_t chars[256] = {0}; - - if (static_cast(mode) > 4u) { - php_warning("Unknown mode %" PRIi64, mode); - return false; - } - - const string::size_type len = str.size(); - for (string::size_type i = 0; i < len; i++) { - chars[static_cast(str[i])]++; - } - - if (mode <= 2) { - array result; - for (int64_t i = 0; i < 256; i++) { - if ((mode != 2 && chars[i] != 0) || - (mode != 1 && chars[i] == 0)) { - result.set_value(i, chars[i]); - } - } - return result; - } - - string result; - for (int i = 0; i < 256; i++) { - if ((mode == 3) == (chars[i] != 0)) { - result.push_back(char(i)); - } - } - return result; -} - -string f$hex2bin(const string &str) { - int len = str.size(); - if (len & 1) { - php_warning("Wrong argument \"%s\" supplied for function hex2bin", str.c_str()); - return {}; - } - - string result(len / 2, false); - for (int i = 0; i < len; i += 2) { - int num_high = hex_to_int(str[i]); - int num_low = hex_to_int(str[i + 1]); - if (num_high == 16 || num_low == 16) { - php_warning("Wrong argument \"%s\" supplied for function hex2bin", str.c_str()); - return {}; - } - result[i / 2] = (char)((num_high << 4) + num_low); - } - - return result; -} - -static const int entities_size = 251; - -static const char *ent_to_num_s[entities_size] = { - "AElig", "Aacute", "Acirc", "Agrave", "Alpha", "Aring", "Atilde", "Auml", "Beta", "Ccedil", - "Chi", "Dagger", "Delta", "ETH", "Eacute", "Ecirc", "Egrave", "Epsilon", "Eta", "Euml", - "Gamma", "Iacute", "Icirc", "Igrave", "Iota", "Iuml", "Kappa", "Lambda", "Mu", "Ntilde", - "Nu", "OElig", "Oacute", "Ocirc", "Ograve", "Omega", "Omicron", "Oslash", "Otilde", "Ouml", - "Phi", "Pi", "Prime", "Psi", "Rho", "Scaron", "Sigma", "THORN", "Tau", "Theta", - "Uacute", "Ucirc", "Ugrave", "Upsilon", "Uuml", "Xi", "Yacute", "Yuml", "Zeta", "aacute", - "acirc", "acute", "aelig", "agrave", "alefsym", "alpha", "amp", "and", "ang", "aring", - "asymp", "atilde", "auml", "bdquo", "beta", "brvbar", "bull", "cap", "ccedil", "cedil", - "cent", "chi", "circ", "clubs", "cong", "copy", "crarr", "cup", "curren", "dArr", - "dagger", "darr", "deg", "delta", "diams", "divide", "eacute", "ecirc", "egrave", "empty", - "emsp", "ensp", "epsilon", "equiv", "eta", "eth", "euml", "euro", "exist", "fnof", - "forall", "frac12", "frac14", "frac34", "frasl", "gamma", "ge", "gt", "hArr", "harr", - "hearts", "hellip", "iacute", "icirc", "iexcl", "igrave", "image", "infin", "int", "iota", - "iquest", "isin", "iuml", "kappa", "lArr", "lambda", "lang", "laquo", "larr", "lceil", - "ldquo", "le", "lfloor", "lowast", "loz", "lrm", "lsaquo", "lsquo", "lt", "macr", - "mdash", "micro", "middot", "minus", "mu", "nabla", "nbsp", "ndash", "ne", "ni", - "not", "notin", "nsub", "ntilde", "nu", "oacute", "ocirc", "oelig", "ograve", "oline", - "omega", "omicron", "oplus", "or", "ordf", "ordm", "oslash", "otilde", "otimes", "ouml", - "para", "part", "permil", "perp", "phi", "pi", "piv", "plusmn", "pound", "prime", - "prod", "prop", "psi", "rArr", "radic", "rang", "raquo", "rarr", "rceil", - "rdquo", "real", "reg", "rfloor", "rho", "rlm", "rsaquo", "rsquo", "sbquo", "scaron", - "sdot", "sect", "shy", "sigma", "sigmaf", "sim", "spades", "sub", "sube", "sum", - "sup", "sup1", "sup2", "sup3", "supe", "szlig", "tau", "there4", "theta", "thetasym", - "thinsp", "thorn", "tilde", "times", "trade", "uArr", "uacute", "uarr", "ucirc", "ugrave", - "uml", "upsih", "upsilon", "uuml", "weierp", "xi", "yacute", "yen", "yuml", "zeta", - "zwj", "zwnj"}; - -static int ent_to_num_i[entities_size] = { - 198, 193, 194, 192, 913, 197, 195, 196, 914, 199, 935, 8225, 916, 208, 201, 202, 200, 917, 919, 203, - 915, 205, 206, 204, 921, 207, 922, 923, 924, 209, 925, 338, 211, 212, 210, 937, 927, 216, 213, 214, - 934, 928, 8243, 936, 929, 352, 931, 222, 932, 920, 218, 219, 217, 933, 220, 926, 221, 376, 918, 225, - 226, 180, 230, 224, 8501, 945, 38, 8743, 8736, 229, 8776, 227, 228, 8222, 946, 166, 8226, 8745, 231, 184, - 162, 967, 710, 9827, 8773, 169, 8629, 8746, 164, 8659, 8224, 8595, 176, 948, 9830, 247, 233, 234, 232, 8709, - 8195, 8194, 949, 8801, 951, 240, 235, 8364, 8707, 402, 8704, 189, 188, 190, 8260, 947, 8805, 62, 8660, 8596, - 9829, 8230, 237, 238, 161, 236, 8465, 8734, 8747, 953, 191, 8712, 239, 954, 8656, 955, 9001, 171, 8592, 8968, - 8220, 8804, 8970, 8727, 9674, 8206, 8249, 8216, 60, 175, 8212, 181, 183, 8722, 956, 8711, 160, 8211, 8800, 8715, - 172, 8713, 8836, 241, 957, 243, 244, 339, 242, 8254, 969, 959, 8853, 8744, 170, 186, 248, 245, 8855, 246, - 182, 8706, 8240, 8869, 966, 960, 982, 177, 163, 8242, 8719, 8733, 968, 8658, 8730, 9002, 187, 8594, 8969, - 8221, 8476, 174, 8971, 961, 8207, 8250, 8217, 8218, 353, 8901, 167, 173, 963, 962, 8764, 9824, 8834, 8838, 8721, - 8835, 185, 178, 179, 8839, 223, 964, 8756, 952, 977, 8201, 254, 732, 215, 8482, 8657, 250, 8593, 251, 249, - 168, 978, 965, 252, 8472, 958, 253, 165, 255, 950, 8205, 8204}; -/* -static int cp1251_to_utf8[128] = { - 0x402, 0x403, 0x201A, 0x453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x409, 0x2039, 0x40A, 0x40C, 0x40B, 0x40F, - 0x452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0, 0x2122, 0x459, 0x203A, 0x45A, 0x45C, 0x45B, 0x45F, - 0xA0, 0x40E, 0x45E, 0x408, 0xA4, 0x490, 0xA6, 0xA7, 0x401, 0xA9, 0x404, 0xAB, 0xAC, 0xAD, 0xAE, 0x407, - 0xB0, 0xB1, 0x406, 0x456, 0x491, 0xB5, 0xB6, 0xB7, 0x451, 0x2116, 0x454, 0xBB, 0x458, 0x405, 0x455, 0x457, - 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417, 0x418, 0x419, 0x41A, 0x41B, 0x41C, 0x41D, 0x41E, 0x41F, - 0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427, 0x428, 0x429, 0x42A, 0x42B, 0x42C, 0x42D, 0x42E, 0x42F, - 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, 0x438, 0x439, 0x43A, 0x43B, 0x43C, 0x43D, 0x43E, 0x43F, - 0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x44A, 0x44B, 0x44C, 0x44D, 0x44E, 0x44F}; -*/ -static const char *cp1251_to_utf8_str[128] = { - "Ђ", "Ѓ", "‚", "ѓ", "„", "…", "†", "‡", "€", "‰", "Љ", "‹", "Њ", "Ќ", - "Ћ", "Џ", - "ђ", "‘", "’", "‛", "“", "•", "–", "—", "", "™", "љ", "›", "њ", "ќ", "ћ", - "џ", - " ", "Ў", "ў", "Ј", "¤", "Ґ", "¦", "§", "Ё", "©", "Є", "«", "¬", "­", "®", - "Ї", - "°", "±", "І", "і", "ґ", "µ", "¶", "·", "ё", "№", "є", "»", "ј", "Ѕ", - "ѕ", "ї", - "А", "Б", "В", "Г", "Д", "Е", "Ж", "З", "И", "Й", "К", "Л", "М", "Н", - "О", "П", - "Р", "С", "Т", "У", "Ф", "Х", "Ц", "Ч", "Ш", "Щ", "Ъ", "Ы", "Ь", "Э", - "Ю", "Я", - "а", "б", "в", "г", "д", "е", "ж", "з", "и", "й", "к", "л", "м", "н", - "о", "п", - "р", "с", "т", "у", "ф", "х", "ц", "ч", "ш", "щ", "ъ", "ы", "ь", "э", - "ю", "я"}; - -string f$htmlentities(const string &str) { - int len = (int)str.size(); - kphp_runtime_context.static_SB.clean().reserve(8 * len); - - for (int i = 0; i < len; i++) { - switch (str[i]) { - case '&': - kphp_runtime_context.static_SB.append_char('&'); - kphp_runtime_context.static_SB.append_char('a'); - kphp_runtime_context.static_SB.append_char('m'); - kphp_runtime_context.static_SB.append_char('p'); - kphp_runtime_context.static_SB.append_char(';'); - break; - case '"': - kphp_runtime_context.static_SB.append_char('&'); - kphp_runtime_context.static_SB.append_char('q'); - kphp_runtime_context.static_SB.append_char('u'); - kphp_runtime_context.static_SB.append_char('o'); - kphp_runtime_context.static_SB.append_char('t'); - kphp_runtime_context.static_SB.append_char(';'); - break; - case '<': - kphp_runtime_context.static_SB.append_char('&'); - kphp_runtime_context.static_SB.append_char('l'); - kphp_runtime_context.static_SB.append_char('t'); - kphp_runtime_context.static_SB.append_char(';'); - break; - case '>': - kphp_runtime_context.static_SB.append_char('&'); - kphp_runtime_context.static_SB.append_char('g'); - kphp_runtime_context.static_SB.append_char('t'); - kphp_runtime_context.static_SB.append_char(';'); - break; - default: - if (str[i] < 0) { - const char *utf8_str = cp1251_to_utf8_str[128 + str[i]]; - kphp_runtime_context.static_SB.append_unsafe(utf8_str, static_cast(strlen(utf8_str))); - } else { - kphp_runtime_context.static_SB.append_char(str[i]); - } - } - } - - return kphp_runtime_context.static_SB.str(); -} - -string f$html_entity_decode(const string &str, int64_t flags, const string &encoding) { - if (flags >= 3) { - php_critical_error ("unsupported parameter flags = %" PRIi64 " in function html_entity_decode", flags); - } - - bool utf8 = memchr(encoding.c_str(), '8', encoding.size()) != nullptr; - if (!utf8 && strstr(encoding.c_str(), "1251") == nullptr) { - php_critical_error ("unsupported encoding \"%s\" in function html_entity_decode", encoding.c_str()); - return str; - } - - int len = str.size(); - string res(len * 7 / 4 + 4, false); - char *p = &res[0]; - for (int i = 0; i < len; i++) { - if (str[i] == '&') { - int j = i + 1; - while (j < len && str[j] != ';') { - j++; - } - if (j < len) { - if ((flags & ENT_QUOTES) && j == i + 5) { - if (str[i + 1] == '#' && str[i + 2] == '0' && str[i + 3] == '3' && str[i + 4] == '9') { - i += 5; - *p++ = '\''; - continue; - } - } - if (!(flags & ENT_NOQUOTES) && j == i + 5) { - if (str[i + 1] == 'q' && str[i + 2] == 'u' && str[i + 3] == 'o' && str[i + 4] == 't') { - i += 5; - *p++ = '\"'; - continue; - } - } - - int l = 0, r = entities_size; - while (l + 1 < r) { - int m = (l + r) >> 1; - if (strncmp(str.c_str() + i + 1, ent_to_num_s[m], j - i - 1) < 0) { - r = m; - } else { - l = m; - } - } - if (strncmp(str.c_str() + i + 1, ent_to_num_s[l], j - i - 1) == 0) { - int num = ent_to_num_i[l]; - i = j; - if (utf8) { - if (num < 128) { - *p++ = (char)num; - } else if (num < 0x800) { - *p++ = (char)(0xc0 + (num >> 6)); - *p++ = (char)(0x80 + (num & 63)); - } else { - *p++ = (char)(0xe0 + (num >> 12)); - *p++ = (char)(0x80 + ((num >> 6) & 63)); - *p++ = (char)(0x80 + (num & 63)); - } - } else { - if (num < 128) { - *p++ = (char)num; - } else { - *p++ = '&'; - *p++ = '#'; - if (num >= 1000) { - *p++ = (char)(num / 1000 % 10 + '0'); - } - *p++ = (char)(num / 100 % 10 + '0'); - *p++ = (char)(num / 10 % 10 + '0'); - *p++ = (char)(num % 10 + '0'); - *p++ = ';'; - } - } - continue; - } - } - } - - *p++ = str[i]; - } - res.shrink(static_cast(p - res.c_str())); - - return res; -} - -string f$htmlspecialchars(const string &str, int64_t flags) { - if (flags >= 3) { - php_critical_error ("unsupported parameter flags = %" PRIi64 " in function htmlspecialchars", flags); - } - - const string::size_type len = str.size(); - kphp_runtime_context.static_SB.clean().reserve(6 * len); - - for (string::size_type i = 0; i < len; i++) { - switch (str[i]) { - case '&': - kphp_runtime_context.static_SB.append_char('&'); - kphp_runtime_context.static_SB.append_char('a'); - kphp_runtime_context.static_SB.append_char('m'); - kphp_runtime_context.static_SB.append_char('p'); - kphp_runtime_context.static_SB.append_char(';'); - break; - case '"': - if (!(flags & ENT_NOQUOTES)) { - kphp_runtime_context.static_SB.append_char('&'); - kphp_runtime_context.static_SB.append_char('q'); - kphp_runtime_context.static_SB.append_char('u'); - kphp_runtime_context.static_SB.append_char('o'); - kphp_runtime_context.static_SB.append_char('t'); - kphp_runtime_context.static_SB.append_char(';'); - } else { - kphp_runtime_context.static_SB.append_char('"'); - } - break; - case '\'': - if (flags & ENT_QUOTES) { - kphp_runtime_context.static_SB.append_char('&'); - kphp_runtime_context.static_SB.append_char('#'); - kphp_runtime_context.static_SB.append_char('0'); - kphp_runtime_context.static_SB.append_char('3'); - kphp_runtime_context.static_SB.append_char('9'); - kphp_runtime_context.static_SB.append_char(';'); - } else { - kphp_runtime_context.static_SB.append_char('\''); - } - break; - case '<': - kphp_runtime_context.static_SB.append_char('&'); - kphp_runtime_context.static_SB.append_char('l'); - kphp_runtime_context.static_SB.append_char('t'); - kphp_runtime_context.static_SB.append_char(';'); - break; - case '>': - kphp_runtime_context.static_SB.append_char('&'); - kphp_runtime_context.static_SB.append_char('g'); - kphp_runtime_context.static_SB.append_char('t'); - kphp_runtime_context.static_SB.append_char(';'); - break; - default: - kphp_runtime_context.static_SB.append_char(str[i]); - } - } - - return kphp_runtime_context.static_SB.str(); -} - -string f$htmlspecialchars_decode(const string &str, int64_t flags) { - if (flags >= 3) { - php_critical_error ("unsupported parameter flags = %" PRIi64 " in function htmlspecialchars_decode", flags); - } - - int len = str.size(); - string res(len, false); - char *p = &res[0]; - for (int i = 0; i < len;) { - if (str[i] == '&') { - if (str[i + 1] == 'a' && str[i + 2] == 'm' && str[i + 3] == 'p' && str[i + 4] == ';') { - *p++ = '&'; - i += 5; - } else if (str[i + 1] == 'q' && str[i + 2] == 'u' && str[i + 3] == 'o' && str[i + 4] == 't' && str[i + 5] == ';' && !(flags & ENT_NOQUOTES)) { - *p++ = '"'; - i += 6; - } else if (str[i + 1] == '#' && str[i + 2] == '0' && str[i + 3] == '3' && str[i + 4] == '9' && str[i + 5] == ';' && (flags & ENT_QUOTES)) { - *p++ = '\''; - i += 6; - } else if (str[i + 1] == 'l' && str[i + 2] == 't' && str[i + 3] == ';') { - *p++ = '<'; - i += 4; - } else if (str[i + 1] == 'g' && str[i + 2] == 't' && str[i + 3] == ';') { - *p++ = '>'; - i += 4; - } else { - *p++ = '&'; - i++; - } - } else { - *p++ = str[i]; - i++; - } - } - res.shrink(static_cast(p - res.c_str())); - - return res; -} - -string f$lcfirst(const string &str) { - int n = str.size(); - if (n == 0) { - return str; - } - - string res(n, false); - res[0] = (char)tolower(str[0]); - memcpy(&res[1], &str[1], n - 1); - - return res; -} - -int64_t f$levenshtein(const string &str1, const string &str2) { - string::size_type len1 = str1.size(); - string::size_type len2 = str2.size(); - - const string::size_type MAX_LEN = 16384; - if (len1 > MAX_LEN || len2 > MAX_LEN) { - php_warning("Too long strings of length %u and %u supplied for function levenshtein. Maximum allowed length is %u.", len1, len2, MAX_LEN); - if (len1 > MAX_LEN) { - len1 = MAX_LEN; - } - if (len2 > MAX_LEN) { - len2 = MAX_LEN; - } - } - - int64_t dp[2][MAX_LEN + 1]; - - for (string::size_type j = 0; j <= len2; j++) { - dp[0][j] = j; - } - - for (string::size_type i = 1; i <= len1; i++) { - dp[i & 1][0] = i; - for (string::size_type j = 1; j <= len2; j++) { - if (str1[i - 1] == str2[j - 1]) { - dp[i & 1][j] = dp[(i - 1) & 1][j - 1]; - } else { - int64_t res = dp[(i - 1) & 1][j - 1]; - if (dp[(i - 1) & 1][j] < res) { - res = dp[(i - 1) & 1][j]; - } - if (dp[i & 1][j - 1] < res) { - res = dp[i & 1][j - 1]; - } - dp[i & 1][j] = res + 1; - } - } - } - return dp[len1 & 1][len2]; -} - -string f$ltrim(const string &s, const string &what) { - const char *mask = get_mask(what); - - int len = (int)s.size(); - if (len == 0 || !mask[(unsigned char)s[0]]) { - return s; - } - - int l = 1; - while (l < len && mask[(unsigned char)s[l]]) { - l++; - } - return {s.c_str() + l, static_cast(len - l)}; -} - -string f$mysql_escape_string(const string &str) { - int len = str.size(); - kphp_runtime_context.static_SB.clean().reserve(2 * len); - for (int i = 0; i < len; i++) { - switch (str[i]) { - case '\0': - case '\n': - case '\r': - case 26: - case '\'': - case '\"': - case '\\': - kphp_runtime_context.static_SB.append_char('\\'); - /* fallthrough */ - default: - kphp_runtime_context.static_SB.append_char(str[i]); - } - } - return kphp_runtime_context.static_SB.str(); -} - -string f$nl2br(const string &str, bool is_xhtml) { - const char *br = is_xhtml ? "
" : "
"; - int br_len = (int)strlen(br); - - int len = str.size(); - kphp_runtime_context.static_SB.clean().reserve((br_len + 1) * len); - - for (int i = 0; i < len;) { - if (str[i] == '\n' || str[i] == '\r') { - kphp_runtime_context.static_SB.append_unsafe(br, br_len); - if (str[i] + str[i + 1] == '\n' + '\r') { - kphp_runtime_context.static_SB.append_char(str[i++]); - } - } - kphp_runtime_context.static_SB.append_char(str[i++]); - } - - return kphp_runtime_context.static_SB.str(); -} - -string f$number_format(double number, int64_t decimals, const string &dec_point, const string &thousands_sep) { - char *result_begin = php_buf + PHP_BUF_LEN; - - if (decimals < 0 || decimals > 100) { - php_warning("Wrong parameter decimals (%" PRIi64 ") in function number_format", decimals); - return {}; - } - bool negative = false; - if (number < 0) { - negative = true; - number *= -1; - } - - double frac = number - floor(number); - number -= frac; - - double mul = pow(10.0, (double)decimals); - frac = round(frac * mul + 1e-9); - - int64_t old_decimals = decimals; - while (result_begin > php_buf && decimals--) { - double x = floor(frac * 0.1 + 0.05); - auto y = static_cast(frac - x * 10 + 0.05); - if ((unsigned int)y >= 10u) { - y = 0; - } - frac = x; - - *--result_begin = (char)(y + '0'); - } - number += frac; - - if (old_decimals > 0) { - string::size_type i = dec_point.size(); - while (result_begin > php_buf && i > 0) { - *--result_begin = dec_point[--i]; - } - } - - int64_t digits = 0; - do { - if (digits && digits % 3 == 0) { - string::size_type i = thousands_sep.size(); - while (result_begin > php_buf && i > 0) { - *--result_begin = thousands_sep[--i]; - } - } - digits++; - - if (result_begin > php_buf) { - double x = floor(number * 0.1 + 0.05); - auto y = static_cast((number - x * 10 + 0.05)); - if ((unsigned int)y >= 10u) { - y = 0; - } - number = x; - - *--result_begin = (char)(y + '0'); - } - } while (result_begin > php_buf && number > 0.5); - - if (result_begin > php_buf && negative) { - *--result_begin = '-'; - } - - if (result_begin <= php_buf) { - php_critical_error ("maximum length of result (%d) exceeded", PHP_BUF_LEN); - return {}; - } - - return {result_begin, static_cast(php_buf + PHP_BUF_LEN - result_begin)}; -} - -int64_t f$ord(const string &s) { - return (unsigned char)s[0]; -} - -static uint64_t float64_bits(double f) { - uint64_t bits = 0; - std::memcpy(&bits, &f, sizeof(uint64_t)); - return bits; -} - -static double float64_from_bits(uint64_t bits) { - double f = 0; - std::memcpy(&f, &bits, sizeof(uint64_t)); - return f; -} - -string f$pack(const string &pattern, const array &a) { - kphp_runtime_context.static_SB.clean(); - int cur_arg = 0; - for (int i = 0; i < (int)pattern.size();) { - if (pattern[i] == '*') { - if (i > 0) { - --i; - } - } - char format = pattern[i++]; - int cnt = 1; - if ('0' <= pattern[i] && pattern[i] <= '9') { - cnt = 0; - do { - cnt = cnt * 10 + pattern[i++] - '0'; - } while ('0' <= pattern[i] && pattern[i] <= '9'); - - if (cnt <= 0) { - php_warning("Wrong count specifier in pattern \"%s\"", pattern.c_str()); - return {}; - } - } else if (pattern[i] == '*') { - cnt = 0; - } - - int arg_num = cur_arg; - if (arg_num >= a.count()) { - if (format == 'A' || format == 'a' || format == 'H' || format == 'h' || cnt != 0) { - php_warning("Not enough parameters to call function pack"); - return {}; - } - if (i + 1 != (int)pattern.size()) { - php_warning("Misplaced symbol '*' in pattern \"%s\"", pattern.c_str()); - return {}; - } - break; - } - cur_arg++; - - mixed arg = a.get_value(arg_num); - - if (arg.is_array()) { - php_warning("Argument %d of function pack is array", arg_num); - return {}; - } - - char filler = 0; - switch (format) { - case 'A': - filler = ' '; - /* fallthrough */ - case 'a': { - string arg_str = arg.to_string(); - int len = arg_str.size(); - if (!cnt) { - cnt = len; - i++; - } - kphp_runtime_context.static_SB.append(arg_str.c_str(), static_cast(min(cnt, len))); - while (cnt > len) { - kphp_runtime_context.static_SB << filler; - cnt--; - } - break; - } - case 'h': - case 'H': { - string arg_str = arg.to_string(); - int len = arg_str.size(); - if (!cnt) { - cnt = len; - i++; - } - for (int j = 0; cnt > 0 && j < len; j += 2) { - int num_high = hex_to_int(arg_str[j]); - int num_low = cnt > 1 ? hex_to_int(arg_str[j + 1]) : 0; - cnt -= 2; - if (num_high == 16 || num_low == 16) { - php_warning("Wrong argument \"%s\" supplied for format '%c' in function pack", arg_str.c_str(), format); - return {}; - } - if (format == 'H') { - kphp_runtime_context.static_SB << (char)((num_high << 4) + num_low); - } else { - kphp_runtime_context.static_SB << (char)((num_low << 4) + num_high); - } - } - if (cnt > 0) { - php_warning("Type %c: not enough characters in string \"%s\" in function pack", format, arg_str.c_str()); - } - break; - } - - default: - do { - switch (format) { - case 'c': - case 'C': - kphp_runtime_context.static_SB << (char)(arg.to_int()); - break; - case 's': - case 'S': - case 'v': { - unsigned short value = (short)arg.to_int(); - kphp_runtime_context.static_SB.append((const char *)&value, 2); - break; - } - case 'n': { - unsigned short value = (short)arg.to_int(); - kphp_runtime_context.static_SB - << (char)(value >> 8) - << (char)(value & 255); - break; - } - case 'i': - case 'I': - case 'l': - case 'L': - case 'V': { - auto value = static_cast(arg.to_int()); - kphp_runtime_context.static_SB.append((const char *)&value, 4); - break; - } - case 'N': { - auto value = static_cast(arg.to_int()); - kphp_runtime_context.static_SB - << (char)(value >> 24) - << (char)((value >> 16) & 255) - << (char)((value >> 8) & 255) - << (char)(value & 255); - break; - } - case 'f': { - float value = (float)arg.to_float(); - kphp_runtime_context.static_SB.append((const char *)&value, sizeof(float)); - break; - } - case 'e': - case 'E': - case 'd': { - double value = arg.to_float(); - uint64_t value_byteordered = float64_bits(value); - if (format == 'e') { - value_byteordered = htole64(value_byteordered); - } else if (format == 'E') { - value_byteordered = htobe64(value_byteordered); - } - kphp_runtime_context.static_SB.append((const char *)&value_byteordered, sizeof(uint64_t)); - break; - } - case 'J': - case 'P': - case 'Q': { - // stored in the host machine order by the default (Q flag) - unsigned long long value_byteordered = static_cast(arg.to_string().to_int()); - if (format == 'P') { - // for P encode in little endian order - value_byteordered = htole64(value_byteordered); - } else if (format == 'J') { - // for J encode in big endian order - value_byteordered = htobe64(value_byteordered); - } - - kphp_runtime_context.static_SB.append((const char *)&value_byteordered, sizeof(unsigned long long)); - break; - } - case 'q': { - int64_t value = arg.to_string().to_int(); - kphp_runtime_context.static_SB.append((const char *)&value, sizeof(long long)); - break; - } - default: - php_warning("Format code \"%c\" not supported", format); - return {}; - } - - if (cnt > 1) { - arg_num = cur_arg++; - if (arg_num >= a.count()) { - php_warning("Not enough parameters to call function pack"); - return {}; - } - - arg = a.get_value(arg_num); - - if (arg.is_array()) { - php_warning("Argument %d of function pack is array", arg_num); - return {}; - } - } - } while (--cnt > 0); - } - } - - php_assert (cur_arg <= a.count()); - if (cur_arg < a.count()) { - php_warning("Too much arguments to call pack with format \"%s\"", pattern.c_str()); - } - - return kphp_runtime_context.static_SB.str(); -} - -string f$prepare_search_query(const string &query) { - const char *s = clean_str(query.c_str()); - if (s == nullptr) { - s = ""; - } - return string(s); -} - -int64_t f$printf(const string &format, const array &a) { +int64_t f$printf(const string &format, const array &a) noexcept { string to_print = f$sprintf(format, a); print(to_print); return to_print.size(); } -string f$rtrim(const string &s, const string &what) { - const char *mask = get_mask(what); - - int len = (int)s.size() - 1; - if (len == -1 || !mask[(unsigned char)s[len]]) { - return s; - } - - while (len > 0 && mask[(unsigned char)s[len - 1]]) { - len--; - } - - return {s.c_str(), static_cast(len)}; +int64_t f$vprintf(const string &format, const array &args) noexcept { + return f$printf(format, args); } -Optional f$setlocale(int64_t category, const string &locale) { +Optional f$setlocale(int64_t category, const string &locale) noexcept { const char *loc = locale.c_str(); if (locale[0] == '0' && locale.size() == 1) { loc = nullptr; @@ -1005,1957 +31,16 @@ Optional f$setlocale(int64_t category, const string &locale) { return string(res); } -string f$sprintf(const string &format, const array &a) { - string result; - result.reserve_at_least(format.size()); - int cur_arg = 0; - bool error_too_big = false; - for (int i = 0; i < (int)format.size(); i++) { - if (format[i] != '%') { - result.push_back(format[i]); - continue; - } - i++; - - int parsed_arg_num = 0, j; - for (j = i; '0' <= format[j] && format[j] <= '9'; j++) { - parsed_arg_num = parsed_arg_num * 10 + format[j] - '0'; - } - int arg_num = -2; - if (format[j] == '$') { - i = j + 1; - arg_num = parsed_arg_num - 1; - } - - char sign = 0; - if (format[i] == '+') { - sign = format[i++]; - } - - char filler = ' '; - if (format[i] == '0' || format[i] == ' ') { - filler = format[i++]; - } else if (format[i] == '\'') { - i++; - filler = format[i++]; - } - - int pad_right = false; - if (format[i] == '-') { - pad_right = true; - i++; - } - - int width = 0; - while ('0' <= format[i] && format[i] <= '9' && width < PHP_BUF_LEN) { - width = width * 10 + format[i++] - '0'; - } - - if (width >= PHP_BUF_LEN) { - error_too_big = true; - break; - } - - int precision = -1; - if (format[i] == '.' && '0' <= format[i + 1] && format[i + 1] <= '9') { - precision = format[i + 1] - '0'; - i += 2; - while ('0' <= format[i] && format[i] <= '9' && precision < PHP_BUF_LEN) { - precision = precision * 10 + format[i++] - '0'; - } - } - - if (precision >= PHP_BUF_LEN) { - error_too_big = true; - break; - } - - string piece; - if (format[i] == '%') { - piece = PERCENT; - } else { - if (arg_num == -2) { - arg_num = cur_arg++; - } - - if (arg_num >= a.count()) { - php_warning("Not enough parameters to call function sprintf with format \"%s\"", format.c_str()); - return {}; - } - - if (arg_num == -1) { - php_warning("Wrong parameter number 0 specified in function sprintf with format \"%s\"", format.c_str()); - return {}; - } - - const mixed &arg = a.get_value(arg_num); - - if (arg.is_array()) { - php_warning("Argument %d of function sprintf is array", arg_num); - return {}; - } - - switch (format[i]) { - case 'b': { - auto arg_int = static_cast(arg.to_int()); - int cur_pos = 70; - do { - php_buf[--cur_pos] = (char)((arg_int & 1) + '0'); - arg_int >>= 1; - } while (arg_int > 0); - piece.assign(php_buf + cur_pos, 70 - cur_pos); - break; - } - case 'c': { - int64_t arg_int = arg.to_int(); - if (arg_int <= -128 || arg_int > 255) { - php_warning("Wrong parameter for specifier %%c in function sprintf with format \"%s\"", format.c_str()); - } - piece.assign(1, (char)arg_int); - break; - } - case 'd': { - int64_t arg_int = arg.to_int(); - if (sign == '+' && arg_int >= 0) { - piece = (kphp_runtime_context.static_SB.clean() << "+" << arg_int).str(); - } else { - piece = string(arg_int); - } - break; - } - case 'u': { - auto arg_int = static_cast(arg.to_int()); - int cur_pos = 70; - do { - php_buf[--cur_pos] = (char)(arg_int % 10 + '0'); - arg_int /= 10; - } while (arg_int > 0); - piece.assign(php_buf + cur_pos, 70 - cur_pos); - break; - } - case 'e': - case 'E': - case 'f': - case 'F': - case 'g': - case 'G': { - double arg_float = arg.to_float(); - - kphp_runtime_context.static_SB.clean() << '%'; - if (sign) { - kphp_runtime_context.static_SB << sign; - } - if (precision >= 0) { - kphp_runtime_context.static_SB << '.' << precision; - } - kphp_runtime_context.static_SB << format[i]; - - int len = snprintf(php_buf, PHP_BUF_LEN, kphp_runtime_context.static_SB.c_str(), arg_float); - if (len >= PHP_BUF_LEN) { - error_too_big = true; - break; - } - - piece.assign(php_buf, len); - break; - } - case 'o': { - auto arg_int = static_cast(arg.to_int()); - int cur_pos = 70; - do { - php_buf[--cur_pos] = (char)((arg_int & 7) + '0'); - arg_int >>= 3; - } while (arg_int > 0); - piece.assign(php_buf + cur_pos, 70 - cur_pos); - break; - } - case 's': { - string arg_string = arg.to_string(); - - kphp_runtime_context.static_SB.clean() << '%'; - if (precision >= 0) { - kphp_runtime_context.static_SB << '.' << precision; - } - kphp_runtime_context.static_SB << 's'; - - int len = snprintf(php_buf, PHP_BUF_LEN, kphp_runtime_context.static_SB.c_str(), arg_string.c_str()); - if (len >= PHP_BUF_LEN) { - error_too_big = true; - break; - } - - piece.assign(php_buf, len); - break; - } - case 'x': - case 'X': { - const char *hex_digits = (format[i] == 'x' ? lhex_digits : uhex_digits); - auto arg_int = static_cast(arg.to_int()); - - int cur_pos = 70; - do { - php_buf[--cur_pos] = hex_digits[arg_int & 15]; - arg_int >>= 4; - } while (arg_int > 0); - piece.assign(php_buf + cur_pos, 70 - cur_pos); - break; - } - default: - php_warning("Unsupported specifier %%%c in sprintf with format \"%s\"", format[i], format.c_str()); - return {}; - } - } - - result.append(f$str_pad(piece, width, string(1, filler), pad_right)); - } - - if (error_too_big) { - php_warning("Too big result in function sprintf"); - return {}; - } - - return result; -} - -string f$stripcslashes(const string &str) { - if (str.empty()) { - return str; - } - - // this implementation is an adapted version from php-src - - auto len = str.size(); - auto new_len = len; - string result(len, false); - char *result_c_str = &result[0]; - char num_tmp[4]; // we need up to three digits + a space for null-terminator - int j = 0; - - for (int i = 0; i < len; i++) { - if (str[i] != '\\' || i + 1 >= len) { - *result_c_str++ = str[i]; - } else { - i++; // step over a backslash - switch (str[i]) { - case 'n': - *result_c_str++ = '\n'; - new_len--; - break; - case 'r': - *result_c_str++ = '\r'; - new_len--; - break; - case 'a': - *result_c_str++ = '\a'; - new_len--; - break; - case 't': - *result_c_str++ = '\t'; - new_len--; - break; - case 'v': - *result_c_str++ = '\v'; - new_len--; - break; - case 'b': - *result_c_str++ = '\b'; - new_len--; - break; - case 'f': - *result_c_str++ = '\f'; - new_len--; - break; - case '\\': - *result_c_str++ = '\\'; - new_len--; - break; - case 'x': // \\xN or \\xNN - // collect up to two hex digits and interpret them as char - if (i+1 < len && isxdigit(static_cast(str[i+1]))) { - num_tmp[0] = str[++i]; - if (i+1 < len && isxdigit(static_cast(str[i+1]))) { - num_tmp[1] = str[++i]; - num_tmp[2] = '\0'; - new_len -= 3; - } else { - num_tmp[1] = '\0'; - new_len -= 2; - } - *result_c_str++ = static_cast(strtol(num_tmp, nullptr, 16)); - } else { - // not a hex literal, just copy a char as i - *result_c_str++ = str[i]; - new_len--; - } - break; - default: // \N \NN \NNN - // collect up to three octal digits and interpret them as char - j = 0; - while (i < len && str[i] >= '0' && str[i] <= '7' && j < 3) { - num_tmp[j++] = str[i++]; - } - if (j) { - num_tmp[j] ='\0'; - *result_c_str++ = static_cast(strtol(num_tmp, nullptr, 8)); - new_len -= j; - i--; - } else { - // not an octal literal, just copy a char as is - *result_c_str++ = str[i]; - new_len--; - } - } - } - } - - if (new_len != 0) { - *result_c_str = '\0'; - } - result.shrink(new_len); - return result; -} - -string f$stripslashes(const string &str) { - int len = str.size(); - int i; - - string result(len, false); - char *result_c_str = &result[0]; - for (i = 0; i + 1 < len; i++) { - if (str[i] == '\\') { - i++; - if (str[i] == '0') { - *result_c_str++ = '\0'; - continue; - } - } - - *result_c_str++ = str[i]; - } - if (i + 1 == len && str[i] != '\\') { - *result_c_str++ = str[i]; - } - result.shrink(static_cast(result_c_str - result.c_str())); - return result; -} - -int64_t f$strcasecmp(const string &lhs, const string &rhs) { - int n = min(lhs.size(), rhs.size()); - for (int i = 0; i < n; i++) { - if (tolower(lhs[i]) != tolower(rhs[i])) { - return tolower(lhs[i]) - tolower(rhs[i]); - } - } - // TODO: for PHP8.2, use <=> operator instead: - // return spaceship(static_cast(lhs.size()), static_cast(rhs.size())); - return static_cast(lhs.size()) - static_cast(rhs.size()); -} - -int64_t f$strcmp(const string &lhs, const string &rhs) { - return lhs.compare(rhs); -} - -Optional f$stripos(const string &haystack, const string &needle, int64_t offset) { - if (offset < 0) { - php_warning("Wrong offset = %" PRIi64 " in function stripos", offset); - return false; - } - if (offset >= haystack.size()) { - return false; - } - if (needle.size() == 0) { - php_warning("Parameter needle is empty in function stripos"); - return false; - } - - const char *s = strcasestr(haystack.c_str() + offset, needle.c_str()); +string f$prepare_search_query(const string &query) noexcept { + const char *s = clean_str(query.c_str()); if (s == nullptr) { - return false; - } - return s - haystack.c_str(); -} - -static bool php_tag_find(const string &tag, const string &allow) { - if (tag.empty() || allow.empty()) { - return false; - } - - string norm; - int state = 0, done = 0; - for (int i = 0; tag[i] && !done; i++) { - char c = (char)tolower(tag[i]); - switch (c) { - case '<': - norm.push_back(c); - break; - case '>': - done = 1; - break; - default: - if (!isspace(c)) { - // since PHP5.3.4, self-closing tags are interpreted as normal tags, - // so normalized
=
; note that tags from $allow are not normalized - if (c != '/') { - norm.push_back(c); - } - if (state == 0) { - state = 1; - } - } else { - if (state == 1) { - done = 1; - } - } - break; - } - } - norm.push_back('>'); - return memmem(allow.c_str(), allow.size(), norm.c_str(), norm.size()) != nullptr; -} - -string f$strip_tags(const string &str, const string &allow) { - int br = 0, depth = 0, in_q = 0; - int state = 0; - - const string allow_low = f$strtolower(allow); - kphp_runtime_context.static_SB.clean(); - kphp_runtime_context.static_SB_spare.clean(); - char lc = 0; - int len = str.size(); - for (int i = 0; i < len; i++) { - char c = str[i]; - switch (c) { - case '\0': - break; - case '<': - if (!in_q) { - if (isspace(str[i + 1])) { - if (state == 0) { - kphp_runtime_context.static_SB << c; - } else if (state == 1) { - kphp_runtime_context.static_SB_spare << c; - } - } else if (state == 0) { - lc = '<'; - state = 1; - kphp_runtime_context.static_SB_spare << '<'; - } else if (state == 1) { - depth++; - } - } - break; - case '(': - if (state == 2) { - if (lc != '"' && lc != '\'') { - lc = '('; - br++; - } - } else if (state == 1) { - kphp_runtime_context.static_SB_spare << c; - } else if (state == 0) { - kphp_runtime_context.static_SB << c; - } - break; - case ')': - if (state == 2) { - if (lc != '"' && lc != '\'') { - lc = ')'; - br--; - } - } else if (state == 1) { - kphp_runtime_context.static_SB_spare << c; - } else if (state == 0) { - kphp_runtime_context.static_SB << c; - } - break; - case '>': - if (depth) { - depth--; - break; - } - - if (in_q) { - break; - } - - switch (state) { - case 1: /* HTML/XML */ - lc = '>'; - in_q = state = 0; - kphp_runtime_context.static_SB_spare << '>'; - if (php_tag_find(kphp_runtime_context.static_SB_spare.str(), allow_low)) { - kphp_runtime_context.static_SB << kphp_runtime_context.static_SB_spare.c_str(); - } - kphp_runtime_context.static_SB_spare.clean(); - break; - case 2: /* PHP */ - if (!br && lc != '\"' && str[i - 1] == '?') { - in_q = state = 0; - kphp_runtime_context.static_SB_spare.clean(); - } - break; - case 3: - in_q = state = 0; - kphp_runtime_context.static_SB_spare.clean(); - break; - case 4: /* JavaScript/CSS/etc... */ - if (i >= 2 && str[i - 1] == '-' && str[i - 2] == '-') { - in_q = state = 0; - kphp_runtime_context.static_SB_spare.clean(); - } - break; - default: - kphp_runtime_context.static_SB << c; - break; - } - break; - - case '"': - case '\'': - if (state == 4) { - /* Inside */ - break; - } else if (state == 2 && str[i - 1] != '\\') { - if (lc == c) { - lc = 0; - } else if (lc != '\\') { - lc = c; - } - } else if (state == 0) { - kphp_runtime_context.static_SB << c; - } else if (state == 1) { - kphp_runtime_context.static_SB_spare << c; - } - if (state && i > 0 && (state == 1 || str[i - 1] != '\\') && (!in_q || c == in_q)) { - if (in_q) { - in_q = 0; - } else { - in_q = c; - } - } - break; - case '!': - /* JavaScript & Other HTML scripting languages */ - if (state == 1 && str[i - 1] == '<') { - state = 3; - lc = c; - } else { - if (state == 0) { - kphp_runtime_context.static_SB << c; - } else if (state == 1) { - kphp_runtime_context.static_SB_spare << c; - } - } - break; - case '-': - if (state == 3 && i >= 2 && str[i - 1] == '-' && str[i - 2] == '!') { - state = 4; - } else { - if (state == 0) { - kphp_runtime_context.static_SB << c; - } else if (state == 1) { - kphp_runtime_context.static_SB_spare << c; - } - } - break; - case '?': - if (state == 1 && str[i - 1] == '<') { - br = 0; - state = 2; - break; - } - /* fall-through */ - case 'E': - case 'e': - /* !DOCTYPE exception */ - if (state == 3 && i > 6 - && tolower(str[i - 1]) == 'p' - && tolower(str[i - 2]) == 'y' - && tolower(str[i - 3]) == 't' - && tolower(str[i - 4]) == 'c' - && tolower(str[i - 5]) == 'o' - && tolower(str[i - 6]) == 'd') { - state = 1; - break; - } - /* fall-through */ - case 'l': - case 'L': - /* swm: If we encounter ' 2 && tolower(str[i - 1]) == 'm' && tolower(str[i - 2]) == 'x') { - state = 1; - break; - } - - /* fall-through */ - default: - if (state == 0) { - kphp_runtime_context.static_SB << c; - } else if (state == 1) { - kphp_runtime_context.static_SB_spare << c; - } - break; - } - } - - return kphp_runtime_context.static_SB.str(); -} - -template -string strip_tags_string(const array &list) { - string allow_str; - if (!list.empty()) { - allow_str.reserve_at_least(list.count() * strlen("
")); - for (const auto &it : list) { - const auto &s = it.get_value(); - if (!s.empty()) { - allow_str.push_back('<'); - allow_str.append(f$strval(s)); - allow_str.push_back('>'); - } - } + s = ""; } - return allow_str; -} - -string f$strip_tags(const string &str, const array &allow_list) { - php_assert(allow_list.empty()); - return f$strip_tags(str, string()); -} - -string f$strip_tags(const string &str, const array &allow_list) { - return f$strip_tags(str, strip_tags_string(allow_list)); -} - -string f$strip_tags(const string &str, const mixed &allow) { - if (!allow.is_array()) { - return f$strip_tags(str, allow.to_string()); - } - auto allow_list = allow.to_array(); - return f$strip_tags(str, strip_tags_string(allow_list)); -} - -Optional f$stristr(const string &haystack, const string &needle, bool before_needle) { - if ((int)needle.size() == 0) { - php_warning("Parameter needle is empty in function stristr"); - return false; - } - - const char *s = strcasestr(haystack.c_str(), needle.c_str()); - if (s == nullptr) { - return false; - } - - const auto pos = static_cast(s - haystack.c_str()); - if (before_needle) { - return haystack.substr(0, pos); - } - return haystack.substr(pos, haystack.size() - pos); -} - -Optional f$strrchr(const string &haystack, const string &needle) { - if (needle.empty()) { - php_warning("Parameter needle is empty in function strrchr"); - return false; - } - if (needle.size() > 1) { - php_warning("Parameter needle contains more than one character, only the first is used"); - } - const char needle_char = needle[0]; - for (string::size_type pos = haystack.size(); pos != 0; --pos) { - if (haystack[pos - 1] == needle_char) { - return haystack.substr(pos - 1, haystack.size() - pos + 1); - } - } - return false; -} - -int64_t f$strncmp(const string &lhs, const string &rhs, int64_t len) { - if (len < 0) { - return 0; - } - return memcmp(lhs.c_str(), rhs.c_str(), min(int64_t{min(lhs.size(), rhs.size())} + 1, len)); -} - -/* - Modified for PHP by Andrei Zmievski - Modified for KPHP by Niyaz Nigmatullin - - compare_right, compare_left and strnatcmp_ex functions - Copyright (C) 2000 by Martin Pool - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. -*/ - -static int64_t compare_right(char const **a, char const *aend, char const **b, char const *bend) { - int64_t bias = 0; - - /* The longest run of digits wins. That aside, the greatest - value wins, but we can't know that it will until we've scanned - both numbers to know that they have the same magnitude, so we - remember it in BIAS. */ - for (;; (*a)++, (*b)++) { - if ((*a == aend || !isdigit((int32_t)(unsigned char)**a)) && - (*b == bend || !isdigit((int32_t)(unsigned char)**b))) { - return bias; - } else if (*a == aend || !isdigit((int32_t)(unsigned char)**a)) { - return -1; - } else if (*b == bend || !isdigit((int32_t)(unsigned char)**b)) { - return +1; - } else if (**a < **b) { - if (!bias) { - bias = -1; - } - } else if (**a > **b) { - if (!bias) { - bias = +1; - } - } - } - - return 0; -} - -static int64_t compare_left(char const **a, char const *aend, char const **b, char const *bend) { - /* Compare two left-aligned numbers: the first to have a - different value wins. */ - for (;; (*a)++, (*b)++) { - if ((*a == aend || !isdigit((int32_t)(unsigned char)**a)) && - (*b == bend || !isdigit((int32_t)(unsigned char)**b))) { - return 0; - } else if (*a == aend || !isdigit((int32_t)(unsigned char)**a)) { - return -1; - } else if (*b == bend || !isdigit((int32_t)(unsigned char)**b)) { - return +1; - } else if (**a < **b) { - return -1; - } else if (**a > **b) { - return +1; - } - } - - return 0; -} - -static int64_t strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t b_len, int64_t fold_case) { - unsigned char ca, cb; - char const *ap, *bp; - char const *aend = a + a_len, - *bend = b + b_len; - bool fractional = false; - int64_t result = 0; - short leading = 1; - - if (a_len == 0 || b_len == 0) { - return (a_len == b_len ? 0 : (a_len > b_len ? 1 : -1)); - } - - ap = a; - bp = b; - while (true) { - ca = *ap; - cb = *bp; - - /* skip over leading zeros */ - while (leading && ca == '0' && (ap + 1 < aend) && isdigit((int32_t)(unsigned char)*(ap + 1))) { - ca = *++ap; - } - - while (leading && cb == '0' && (bp + 1 < bend) && isdigit((int32_t)(unsigned char)*(bp + 1))) { - cb = *++bp; - } - - leading = 0; - - /* Skip consecutive whitespace */ - while (isspace((int32_t)(unsigned char)ca)) { - ca = *++ap; - } - - while (isspace((int32_t)(unsigned char)cb)) { - cb = *++bp; - } - - /* process run of digits */ - if (isdigit((int32_t)(unsigned char)ca) && isdigit((int32_t)(unsigned char)cb)) { - fractional = (ca == '0' || cb == '0'); - - if (fractional) { - result = compare_left(&ap, aend, &bp, bend); - } else { - result = compare_right(&ap, aend, &bp, bend); - } - - if (result != 0) { - return result; - } - - if (ap == aend && bp == bend) { - /* End of the strings. Let caller sort them out. */ - return 0; - } else { - /* Keep on comparing from the current point. */ - ca = *ap; - cb = *bp; - } - } - - if (fold_case) { - ca = static_cast(toupper(ca)); - cb = static_cast(toupper(cb)); - } - - if (ca < cb) { - return -1; - } else if (ca > cb) { - return +1; - } - - ++ap; - ++bp; - if (ap >= aend && bp >= bend) { - /* The strings compare the same. Perhaps the caller - will want to call strcmp to break the tie. */ - return 0; - } else if (ap >= aend) { - return -1; - } else if (bp >= bend) { - return 1; - } - } -} - - -int64_t f$strnatcmp(const string &lhs, const string &rhs) { - return strnatcmp_ex(lhs.c_str(), lhs.size(), rhs.c_str(), rhs.size(), 0); -} - -int64_t f$strspn(const string &hayshack, const string &char_list, int64_t offset) noexcept { - return strspn(hayshack.c_str() + hayshack.get_correct_offset_clamped(offset), char_list.c_str()); -} - -int64_t f$strcspn(const string &hayshack, const string &char_list, int64_t offset) noexcept { - return strcspn(hayshack.c_str() + hayshack.get_correct_offset_clamped(offset), char_list.c_str()); -} - -Optional f$strpbrk(const string &haystack, const string &char_list) { - const char *pos = strpbrk(haystack.c_str(), char_list.c_str()); - if (pos == nullptr) { - return false; - } - - return string(pos, static_cast(haystack.size() - (pos - haystack.c_str()))); -} - -Optional f$strpos(const string &haystack, const string &needle, int64_t offset) { - if (offset < 0) { - php_warning("Wrong offset = %" PRIi64 " in function strpos", offset); - return false; - } - if (offset > int64_t{haystack.size()}) { - return false; - } - if (needle.size() <= 1) { - if (needle.size() == 0) { - php_warning("Parameter needle is empty in function strpos"); - return false; - } - - const char *s = static_cast (memchr(haystack.c_str() + offset, needle[0], haystack.size() - offset)); - if (s == nullptr) { - return false; - } - return s - haystack.c_str(); - } - - const char *s = static_cast (memmem(haystack.c_str() + offset, haystack.size() - offset, needle.c_str(), needle.size())); - if (s == nullptr) { - return false; - } - return s - haystack.c_str(); -} - -Optional f$strrpos(const string &haystack, const string &needle, int64_t offset) { - const char *end = haystack.c_str() + haystack.size(); - if (offset < 0) { - offset += haystack.size() + 1; - if (offset < 0) { - return false; - } - - end = haystack.c_str() + offset; - offset = 0; - } - if (offset >= haystack.size()) { - return false; - } - if (needle.size() == 0) { - php_warning("Parameter needle is empty in function strrpos"); - return false; - } - - const char *s = static_cast (memmem(haystack.c_str() + offset, haystack.size() - offset, needle.c_str(), needle.size())), *t; - if (s == nullptr || s >= end) { - return false; - } - while ((t = static_cast (memmem(s + 1, haystack.c_str() + haystack.size() - s - 1, needle.c_str(), needle.size()))) != nullptr && t < end) { - s = t; - } - return s - haystack.c_str(); -} - -Optional f$strripos(const string &haystack, const string &needle, int64_t offset) { - const char *end = haystack.c_str() + haystack.size(); - if (offset < 0) { - offset += haystack.size() + 1; - if (offset < 0) { - return false; - } - - end = haystack.c_str() + offset; - offset = 0; - } - if (offset >= haystack.size()) { - return false; - } - if (needle.size() == 0) { - php_warning("Parameter needle is empty in function strripos"); - return false; - } - - const char *s = strcasestr(haystack.c_str() + offset, needle.c_str()), *t; - if (s == nullptr || s >= end) { - return false; - } - while ((t = strcasestr(s + 1, needle.c_str())) != nullptr && t < end) { - s = t; - } - return s - haystack.c_str(); -} - -string f$strrev(const string &str) { - int n = str.size(); - - string res(n, false); - for (int i = 0; i < n; i++) { - res[n - i - 1] = str[i]; - } - - return res; -} - -Optional f$strstr(const string &haystack, const string &needle, bool before_needle) { - if ((int)needle.size() == 0) { - php_warning("Parameter needle is empty in function strstr"); - return false; - } - - const char *s = static_cast (memmem(haystack.c_str(), haystack.size(), needle.c_str(), needle.size())); - if (s == nullptr) { - return false; - } - - const auto pos = static_cast(s - haystack.c_str()); - if (before_needle) { - return haystack.substr(0, pos); - } - return haystack.substr(pos, haystack.size() - pos); -} - -string f$strtolower(const string &str) { - int n = str.size(); - - // if there is no upper case char inside the string, we can - // return the argument unchanged, avoiding the allocation and data copying; - // while at it, memorize the first upper case char, so we can - // use memcpy to copy everything before that pos; - // note: do not use islower() here, the compiler does not inline that function call; - // it could be beneficial to use 256-byte LUT here, but SIMD approach could be even better - const char *end = str.c_str() + n; - const char *uppercase_pos = std::find_if(str.c_str(), end, [](unsigned char ch) { - return ch >= 'A' && ch <= 'Z'; - }); - if (uppercase_pos == end) { - return str; - } - - string res(n, false); - int64_t lowercase_prefix = uppercase_pos - str.c_str(); - if (lowercase_prefix != 0) { // avoid unnecessary function call - std::memcpy(res.buffer(), str.c_str(), lowercase_prefix); - } - for (int i = lowercase_prefix; i < n; i++) { - res[i] = static_cast(std::tolower(static_cast(str[i]))); - } - - return res; -} - -string f$strtoupper(const string &str) { - int n = str.size(); - - // same optimization as in strtolower - const char *end = str.c_str() + n; - const char *lowercase_pos = std::find_if(str.c_str(), end, [](unsigned char ch) { - return ch >= 'a' && ch <= 'z'; - }); - if (lowercase_pos == end) { - return str; - } - - string res(n, false); - int64_t uppercase_prefix = lowercase_pos - str.c_str(); - if (uppercase_prefix != 0) { // avoid unnecessary function call - std::memcpy(res.buffer(), str.c_str(), uppercase_prefix); - } - for (int i = uppercase_prefix; i < n; i++) { - res[i] = static_cast(std::toupper(static_cast(str[i]))); - } - - return res; -} - -string f$strtr(const string &subject, const string &from, const string &to) { - int n = subject.size(); - string result(n, false); - for (int i = 0; i < n; i++) { - const char *p = static_cast (memchr(static_cast (from.c_str()), (int)(unsigned char)subject[i], (size_t)from.size())); - if (p == nullptr || static_cast(p - from.c_str()) >= to.size()) { - result[i] = subject[i]; - } else { - result[i] = to[static_cast(p - from.c_str())]; - } - } - return result; -} - -string f$str_pad(const string &input, int64_t len, const string &pad_str, int64_t pad_type) { - string::size_type old_len = input.size(); - if (len <= old_len) { - return input; - } - if (len > string::max_size()) { - php_critical_error ("tried to allocate too big string of size %" PRIi64, len); - } - - const auto strlen = static_cast(len); - - string::size_type pad_left = 0; - string::size_type pad_right = 0; - if (pad_type == STR_PAD_RIGHT) { - pad_right = strlen - old_len; - } else if (pad_type == STR_PAD_LEFT) { - pad_left = strlen - old_len; - } else if (pad_type == STR_PAD_BOTH) { - pad_left = (strlen - old_len) / 2; - pad_right = (strlen - old_len + 1) / 2; - } else { - php_warning("Wrong parameter pad_type in function str_pad"); - return input; - } - - string::size_type pad_len = pad_str.size(); - if (pad_len == 0) { - php_warning("Wrong parameter pad_str (empty string) in function str_pad"); - return input; - } - - string res(strlen, false); - for (string::size_type i = 0; i < pad_left; i++) { - res[i] = pad_str[i % pad_len]; - } - memcpy(&res[pad_left], input.c_str(), old_len); - for (string::size_type i = 0; i < pad_right; i++) { - res[i + pad_left + old_len] = pad_str[i % pad_len]; - } - - return res; -} - -string f$str_repeat(const string &s, int64_t multiplier) { - const string::size_type len = s.size(); - if (multiplier <= 0 || len == 0) { - return {}; - } - - auto mult = static_cast(multiplier); - if (string::max_size() / len < mult) { - php_critical_error ("tried to allocate too big string of size %" PRIi64, multiplier * len); - } - - if (len == 1) { - return {mult, s[0]}; - } - - string result(mult * len, false); - if (len >= 5) { - while (mult--) { - memcpy(&result[mult * len], s.c_str(), len); - } - } else { - for (string::size_type i = 0; i < mult; i++) { - for (string::size_type j = 0; j < len; j++) { - result[i * len + j] = s[j]; - } - } - } - return result; -} - -static string str_replace_char(char c, const string &replace, const string &subject, int64_t &replace_count, bool with_case) { - int count = 0; - const char *piece = subject.c_str(); - const char *piece_end = subject.c_str() + subject.size(); - - string result; - if (!replace.empty()) { - result.reserve_at_least(subject.size()); - } - - while (true) { - const char *pos = nullptr; - if (with_case) { - pos = static_cast(memchr(piece, c, piece_end - piece)); - } else { - const char needle[] = {c, '\0'}; - pos = strcasestr(piece, needle); - } - - if (pos == nullptr) { - if (count == 0) { - return subject; - } - replace_count += count; - result.append(piece, static_cast(piece_end - piece)); - return result; - } - - ++count; - - result.append(piece, static_cast(pos - piece)); - result.append(replace); - - piece = pos + 1; - } - php_assert (0); // unreachable - return {}; -} - -static const char *find_substr(const char *where, const char *where_end, const string &what, bool with_case) { - if (with_case) { - return static_cast(memmem(where, where_end - where, what.c_str(), what.size())); - } - - return strcasestr(where, what.c_str()); -} - -void str_replace_inplace(const string &search, const string &replace, string &subject, int64_t &replace_count, bool with_case) { - if (search.empty()) { - php_warning("Parameter search is empty in function str_replace"); - return; - } - - subject.make_not_shared(); - - int count = 0; - const char *piece = subject.c_str(); - const char *piece_end = subject.c_str() + subject.size(); - - char *output = subject.buffer(); - bool length_no_change = search.size() == replace.size(); - - while (true) { - const char *pos = find_substr(piece, piece_end, search, with_case); - if (pos == nullptr) { - if (count == 0) { - return; - } - replace_count += count; - if (!length_no_change) { - memmove(output, piece, piece_end - piece); - } - output += piece_end - piece; - if (!length_no_change) { - subject.shrink(static_cast(output - subject.c_str())); - } - return; - } - - ++count; - - if (!length_no_change) { - memmove(output, piece, pos - piece); - } - output += pos - piece; - memcpy(output, replace.c_str(), replace.size()); - output += replace.size(); - - piece = pos + search.size(); - } - php_assert (0); // unreachable -} - -string str_replace(const string &search, const string &replace, const string &subject, int64_t &replace_count, bool with_case) { - if (search.empty()) { - php_warning("Parameter search is empty in function str_replace"); - return subject; - } - - int count = 0; - const char *piece = subject.c_str(); - const char *piece_end = subject.c_str() + subject.size(); - - string result; - while (true) { - const char *pos = find_substr(piece, piece_end, search, with_case); - if (pos == nullptr) { - if (count == 0) { - return subject; - } - replace_count += count; - result.append(piece, static_cast(piece_end - piece)); - return result; - } - - ++count; - - result.append(piece, static_cast(pos - piece)); - result.append(replace); - - piece = pos + search.size(); - } - php_assert (0); // unreachable - return {}; -} - -// common for f$str_replace(string) and f$str_ireplace(string) -string str_replace_gen(const string &search, const string &replace, const string &subject, int64_t &replace_count, bool with_case); - -string str_replace_string(const mixed &search, const mixed &replace, const string &subject, int64_t &replace_count, bool with_case) { - if (search.is_array() && replace.is_array()) { - return str_replace_string_array(search.as_array(""), replace.as_array(""), subject, replace_count, with_case); - } else if (search.is_array()) { - string result = subject; - const string &replace_value = replace.to_string(); - - for (array::const_iterator it = search.begin(); it != search.end(); ++it) { - const string &search_string = f$strval(it.get_value()); - if (search_string.size() >= replace_value.size()) { - str_replace_inplace(search_string, replace_value, result, replace_count, with_case); - } else { - result = str_replace(search_string, replace_value, result, replace_count, with_case); - } - } - return result; - } else { - if (replace.is_array()) { - php_warning("Parameter mismatch, search is a string while replace is an array"); - //return false; - } - - return str_replace_gen(f$strval(search), f$strval(replace), subject, replace_count, with_case); - } -} - -// common for f$str_replace(string) and f$str_ireplace(string) -string str_replace_gen(const string &search, const string &replace, const string &subject, int64_t &replace_count, bool with_case) { - replace_count = 0; - if (search.size() == 1) { - return str_replace_char(search[0], replace, subject, replace_count, with_case); - } else { - return str_replace(search, replace, subject, replace_count, with_case); - } -} - -string f$str_replace(const string &search, const string &replace, const string &subject, int64_t &replace_count) { - return str_replace_gen(search, replace, subject, replace_count, true); -} - -string f$str_ireplace(const string &search, const string &replace, const string &subject, int64_t &replace_count) { - return str_replace_gen(search, replace, subject, replace_count, false); -} - -string f$str_replace(const mixed &search, const mixed &replace, const string &subject, int64_t &replace_count) { - return str_replace_string(search, replace, subject, replace_count, true); -} - -string f$str_ireplace(const mixed &search, const mixed &replace, const string &subject, int64_t &replace_count) { - return str_replace_string(search, replace, subject, replace_count, false); -} - -// common for f$str_replace(mixed) and f$str_ireplace(mixed) -mixed str_replace_gen(const mixed &search, const mixed &replace, const mixed &subject, int64_t &replace_count, bool with_case) { - replace_count = 0; - if (subject.is_array()) { - array result; - for (array::const_iterator it = subject.begin(); it != subject.end(); ++it) { - mixed cur_result = str_replace_string(search, replace, it.get_value().to_string(), replace_count, with_case); - if (!cur_result.is_null()) { - result.set_value(it.get_key(), cur_result); - } - } - return result; - } else { - return str_replace_string(search, replace, subject.to_string(), replace_count, with_case); - } -} - -mixed f$str_replace(const mixed &search, const mixed &replace, const mixed &subject, int64_t &replace_count) { - return str_replace_gen(search, replace, subject, replace_count, true); -} - -mixed f$str_ireplace(const mixed &search, const mixed &replace, const mixed &subject, int64_t &replace_count) { - return str_replace_gen(search, replace, subject, replace_count, false); -} - -array f$str_split(const string &str, int64_t split_length) { - if (split_length <= 0) { - php_warning ("Wrong parameter split_length = %" PRIi64 " in function str_split", split_length); - array result(array_size(1, true)); - result.set_value(0, str); - return result; - } - - array result(array_size((str.size() + split_length - 1) / split_length, true)); - string::size_type i = 0; - for (i = 0; i + split_length <= str.size(); i += static_cast(split_length)) { - result.push_back(str.substr(i, static_cast(split_length))); - } - if (i < str.size()) { - result.push_back(str.substr(i, str.size() - i)); - } - return result; -} - -Optional f$substr(const string &str, int64_t start, int64_t length) { - if (!wrap_substr_args(str.size(), start, length)) { - return false; - } - return str.substr(static_cast(start), static_cast(length)); -} - -Optional f$substr(tmp_string str, int64_t start, int64_t length) { - if (!wrap_substr_args(str.size, start, length)) { - return false; - } - return string(str.data + start, length); -} - -tmp_string f$_tmp_substr(const string &str, int64_t start, int64_t length) { - if (!wrap_substr_args(str.size(), start, length)) { - return {}; - } - return {str.c_str() + start, static_cast(length)}; -} - -tmp_string f$_tmp_substr(tmp_string str, int64_t start, int64_t length) { - if (!wrap_substr_args(str.size, start, length)) { - return {}; - } - return {str.data + start, static_cast(length)}; -} - -int64_t f$substr_count(const string &haystack, const string &needle, int64_t offset, int64_t length) { - offset = haystack.get_correct_offset(offset); - if (offset >= haystack.size()) { - return 0; - } - if (length > haystack.size() - offset) { - length = haystack.size() - offset; - } - - int64_t ans = 0; - const char *s = haystack.c_str() + offset, *end = haystack.c_str() + offset + length; - if (needle.empty()) { - php_warning("Needle is empty in function substr_count"); - return end - s; - } - do { - s = static_cast (memmem(static_cast (s), (size_t)(end - s), static_cast (needle.c_str()), (size_t)needle.size())); - if (s == nullptr) { - return ans; - } - ans++; - s += needle.size(); - } while (true); -} - -string f$substr_replace(const string &str, const string &replacement, int64_t start, int64_t length) { - int64_t str_len = str.size(); - - // if $start is negative, count $start from the end of the string - start = str.get_correct_offset_clamped(start); - - // if $length is negative, set it to the length needed - // needed to stop that many chars from the end of the string - if (length < 0) { - length = (str_len - start) + length; - if (length < 0) { - length = 0; - } - } - - if (length > str_len) { - length = str_len; - } - if ((start + length) > str_len) { - length = str_len - start; - } - - auto result = str.substr(0, static_cast(start)); - result.append(replacement); - const auto offset = static_cast(start + length); - result.append(str.substr(offset, str.size() - offset)); - return result; -} - -Optional f$substr_compare(const string &main_str, const string &str, int64_t offset, int64_t length, bool case_insensitivity) { - int64_t str_len = main_str.size(); - - if (length < 0) { - php_warning("The length must be greater than or equal to zero in substr_compare function call"); - return false; - } - - offset = main_str.get_correct_offset(offset); - - // > and >= signs depend on version of PHP7.2 and could vary unpredictably. We put `>` sign which corresponds to behaviour of PHP7.2.22 - if (offset > str_len) { - php_warning("The start position cannot exceed initial string length in substr_compare function call"); - return false; - } - - if (case_insensitivity) { - return strncasecmp(main_str.c_str() + offset, str.c_str(), length); - } else { - return strncmp(main_str.c_str() + offset, str.c_str(), length); - } -} - -bool f$str_starts_with(const string &haystack, const string &needle) { - return haystack.starts_with(needle); -} - -bool f$str_ends_with(const string &haystack, const string &needle) { - return haystack.ends_with(needle); -} - -tmp_string trim_impl(const char *s, string::size_type s_len, const string &what) { - const char *mask = get_mask(what); - - int len = s_len; - if (len == 0 || (!mask[(unsigned char)s[len - 1]] && !mask[(unsigned char)s[0]])) { - return {s, s_len}; - } - - while (len > 0 && mask[(unsigned char)s[len - 1]]) { - len--; - } - - if (len == 0) { - return {}; - } - - int l = 0; - while (mask[(unsigned char)s[l]]) { - l++; - } - return {s + l, static_cast(len - l)}; -} - -tmp_string f$_tmp_trim(tmp_string s, const string &what) { - return trim_impl(s.data, s.size, what); -} - -tmp_string f$_tmp_trim(const string &s, const string &what) { - return trim_impl(s.c_str(), s.size(), what); -} - -string f$trim(tmp_string s, const string &what) { - return materialize_tmp_string(trim_impl(s.data, s.size, what)); -} - -string f$trim(const string &s, const string &what) { - tmp_string result = trim_impl(s.c_str(), s.size(), what); - if (result.data == s.c_str() && result.size == s.size()) { - return s; - } - return materialize_tmp_string(result); -} - -string f$ucfirst(const string &str) { - int n = str.size(); - if (n == 0) { - return str; - } - - string res(n, false); - res[0] = (char)toupper(str[0]); - memcpy(&res[1], &str[1], n - 1); - - return res; -} - -string f$ucwords(const string &str) { - int n = str.size(); - - bool in_word = false; - string res(n, false); - for (int i = 0; i < n; i++) { - int cur = str[i] & 0xdf; - if ('A' <= cur && cur <= 'Z') { - if (in_word) { - res[i] = str[i]; - } else { - res[i] = (char)cur; - in_word = true; - } - } else { - res[i] = str[i]; - in_word = false; - } - } - - return res; -} - -Optional> f$unpack(const string &pattern, const string &data) { - array result; - - int data_len = data.size(), data_pos = 0; - for (int i = 0; i < (int)pattern.size();) { - char format = pattern[i++]; - int cnt = -1; - if ('0' <= pattern[i] && pattern[i] <= '9') { - cnt = 0; - do { - cnt = cnt * 10 + pattern[i++] - '0'; - } while ('0' <= pattern[i] && pattern[i] <= '9'); - - if (cnt <= 0) { - php_warning("Wrong count specifier in pattern \"%s\"", pattern.c_str()); - return false; - } - } else if (pattern[i] == '*') { - cnt = 0; - i++; - } - if (data_pos >= data_len) { - if (format == 'A' || format == 'a' || format == 'H' || format == 'h' || cnt != 0) { - php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); - return false; - } - return result; - } - - const char *key_end = strchrnul(&pattern[i], '/'); - string key_prefix(pattern.c_str() + i, static_cast(key_end - pattern.c_str() - i)); - i = (int)(key_end - pattern.c_str()); - if (i < (int)pattern.size()) { - i++; - } - - if (cnt == 0 && i != (int)pattern.size()) { - php_warning("Misplaced symbol '*' in pattern \"%s\"", pattern.c_str()); - return false; - } - - char filler = 0; - switch (format) { - case 'A': - filler = ' '; - /* fallthrough */ - case 'a': { - if (cnt == 0) { - cnt = data_len - data_pos; - } else if (cnt == -1) { - cnt = 1; - } - int read_len = cnt; - if (read_len + data_pos > data_len) { - php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); - return false; - } - while (cnt > 0 && data[data_pos + cnt - 1] == filler) { - cnt--; - } - - if (key_prefix.empty()) { - key_prefix = ONE; - } - - result.set_value(key_prefix, string(data.c_str() + data_pos, cnt)); - - data_pos += read_len; - break; - } - case 'h': - case 'H': { - if (cnt == 0) { - cnt = (data_len - data_pos) * 2; - } else if (cnt == -1) { - cnt = 1; - } - - int read_len = (cnt + 1) / 2; - if (read_len + data_pos > data_len) { - php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); - return false; - } - - string value(cnt, false); - for (int j = data_pos; cnt > 0; j++, cnt -= 2) { - unsigned char ch = data[j]; - char num_high = lhex_digits[ch >> 4]; - char num_low = lhex_digits[ch & 15]; - if (format == 'h') { - swap(num_high, num_low); - } - - value[(j - data_pos) * 2] = num_high; - if (cnt > 1) { - value[(j - data_pos) * 2 + 1] = num_low; - } - } - php_assert (cnt == 0 || cnt == -1); - - if (key_prefix.empty()) { - key_prefix = ONE; - } - - result.set_value(key_prefix, value); - - data_pos += read_len; - break; - } - - default: { - if (key_prefix.empty() && cnt == -1) { - key_prefix = ONE; - } - int counter = 1; - do { - mixed value; - int value_int; - if (data_pos >= data_len) { - php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); - return false; - } - - switch (format) { - case 'c': - case 'C': - value_int = (int)data[data_pos++]; - if (format != 'c' && value_int < 0) { - value_int += 256; - } - value = value_int; - break; - case 's': - case 'S': - case 'v': - value_int = (unsigned char)data[data_pos]; - if (data_pos + 1 < data_len) { - value_int |= data[data_pos + 1] << 8; - } - data_pos += 2; - if (format != 's' && value_int < 0) { - value_int += 65536; - } - value = value_int; - break; - case 'n': - value_int = (unsigned char)data[data_pos] << 8; - if (data_pos + 1 < data_len) { - value_int |= (unsigned char)data[data_pos + 1]; - } - data_pos += 2; - value = value_int; - break; - case 'i': - case 'I': - case 'l': - case 'L': - case 'V': - value_int = (unsigned char)data[data_pos]; - if (data_pos + 1 < data_len) { - value_int |= (unsigned char)data[data_pos + 1] << 8; - if (data_pos + 2 < data_len) { - value_int |= (unsigned char)data[data_pos + 2] << 16; - if (data_pos + 3 < data_len) { - value_int |= data[data_pos + 3] << 24; - } - } - } - data_pos += 4; - value = value_int; - break; - case 'N': - value_int = (unsigned char)data[data_pos] << 24; - if (data_pos + 1 < data_len) { - value_int |= (unsigned char)data[data_pos + 1] << 16; - if (data_pos + 2 < data_len) { - value_int |= (unsigned char)data[data_pos + 2] << 8; - if (data_pos + 3 < data_len) { - value_int |= (unsigned char)data[data_pos + 3]; - } - } - } - data_pos += 4; - value = value_int; - break; - case 'f': { - if (data_pos + (int)sizeof(float) > data_len) { - php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); - return false; - } - value = (double)*(float *)(data.c_str() + data_pos); - data_pos += (int)sizeof(float); - break; - } - case 'e': - case 'E': - case 'd': { - if (data_pos + (int)sizeof(double) > data_len) { - php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); - return false; - } - uint64_t value_byteordered = 0; - memcpy(&value_byteordered, data.c_str() + data_pos, sizeof(double)); - if (format == 'e') { - value_byteordered = le64toh(value_byteordered); - } else if (format == 'E') { - value_byteordered = be64toh(value_byteordered); - } - value = float64_from_bits(value_byteordered); - data_pos += (int)sizeof(double); - break; - } - case 'J': - case 'P': - case 'Q': { - if (data_pos + (int)sizeof(unsigned long long) > data_len) { - php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); - return false; - } - - // stored in the host machine order by the default (Q flag) - unsigned long long value_byteordered = 0; - memcpy(&value_byteordered, data.c_str() + data_pos, sizeof(value_byteordered)); - if (format == 'P') { - // for P encode in little endian order - value_byteordered = le64toh(value_byteordered); - } else if (format == 'J') { - // for J encode in big endian order - value_byteordered = be64toh(value_byteordered); - } - - const size_t buf_size = 20; - char buf[buf_size]; - value = string{buf, static_cast(simd_uint64_to_string(value_byteordered, buf) - buf)}; - data_pos += (int)sizeof(unsigned long long); - break; - } - case 'q': { - if (data_pos + (int)sizeof(long long) > data_len) { - php_warning("Not enough data to unpack with format \"%s\"", pattern.c_str()); - return false; - } - long long value_ll = *reinterpret_cast(data.c_str() + data_pos); - value = f$strval(static_cast(value_ll)); - data_pos += (int)sizeof(long long); - break; - } - default: - php_warning("Format code \"%c\" not supported", format); - return false; - } - - string key = key_prefix; - if (cnt != -1) { - key.append(string(counter++)); - } - - result.set_value(key, value); - - if (cnt == 0) { - if (data_pos >= data_len) { - return result; - } - } - } while (cnt == 0 || --cnt > 0); - } - } - } - return result; -} - -int64_t f$vprintf(const string &format, const array &args) { - return f$printf(format, args); -} - -string f$vsprintf(const string &format, const array &args) { - return f$sprintf(format, args); -} - -string f$wordwrap(const string &str, int64_t width, const string &brk, bool cut) { - if (width <= 0) { - php_warning("Wrong parameter width = %" PRIi64 " in function wordwrap", width); - return str; - } - - string result; - string::size_type first = 0; - const string::size_type n = str.size(); - int64_t last_space = -1; - for (string::size_type i = 0; i < n; i++) { - if (str[i] == ' ') { - last_space = i; - } - if (i >= first + width && (cut || last_space > first)) { - if (last_space <= first) { - result.append(str, first, i - first); - first = i; - } else { - result.append(str, first, static_cast(last_space) - first); - first = static_cast(last_space + 1); - } - result.append(brk); - } - } - result.append(str, first, str.size() - first); - return result; -} - -string f$xor_strings(const string &s, const string &t) { - string::size_type length = min(s.size(), t.size()); - string result{length, false}; - const char *s_str = s.c_str(); - const char *t_str = t.c_str(); - char *res_str = result.buffer(); - for (string::size_type i = 0; i < length; i++) { - *res_str = *s_str ^ *t_str; - ++s_str; - ++t_str; - ++res_str; - } - return result; -} - -namespace impl_ { -// Based on the original PHP implementation -// https://github.com/php/php-src/blob/e8678fcb42c5cb1ea38ff9c6819baca74c2bb5ea/ext/standard/string.c#L3375-L3418 -inline size_t php_similar_str(vk::string_view first, vk::string_view second, size_t &pos1, size_t &pos2, size_t &count) { - size_t max = 0; - count = 0; - for (const char *p = first.begin(); p != first.end(); ++p) { - for (const char *q = second.begin(); q != second.end(); ++q) { - size_t l = 0; - for (; (p + l < first.end()) && (q + l < second.end()) && (p[l] == q[l]); ++l) { - } - if (l > max) { - max = l; - ++count; - pos1 = p - first.begin(); - pos2 = q - second.begin(); - } - } - } - return max; -} - -size_t php_similar_char(vk::string_view first, vk::string_view second) { - size_t pos1 = 0; - size_t pos2 = 0; - size_t count = 0; - - const size_t max = php_similar_str(first, second, pos1, pos2, count); - size_t sum = max; - if (sum) { - if (pos1 && pos2 && count > 1) { - sum += php_similar_char(first.substr(0, pos1), second.substr(0, pos2)); - } - pos1 += max; - pos2 += max; - if (pos1 < first.size() && pos2 < second.size()) { - sum += php_similar_char(first.substr(pos1), second.substr(pos2)); - } - } - return sum; -} - -double default_similar_text_percent_stub{0.0}; -} // namespace impl_ - -int64_t f$similar_text(const string &first, const string &second, double &percent) { - if (first.empty() && second.empty()) { - percent = 0.0; - return 0; - } - const size_t sim = impl_::php_similar_char(vk::string_view{first.c_str(), first.size()}, vk::string_view{second.c_str(), second.size()}); - percent = static_cast(sim) * 200.0 / (first.size() + second.size()); - return static_cast(sim); -} - -string str_concat(const string &s1, const string &s2) { - // for 2 argument concatenation it's not so uncommon to have at least one empty string argument; - // it happens in cases like `$prefix . $s` where $prefix could be empty depending on some condition - // real-world applications analysis shows that ~17.6% of all two arguments concatenations have - // at least one empty string argument - // - // checking both lengths for 0 is almost free, but when we step into those 17.6%, we get almost x10 - // faster concatenation and no heap allocations - // - // this idea is borrowed from the Go runtime - if (s1.empty()) { - return s2; - } - if (s2.empty()) { - return s1; - } - auto new_size = s1.size() + s2.size(); - return string(new_size, true).append_unsafe(s1).append_unsafe(s2).finish_append(); -} - -string str_concat(str_concat_arg s1, str_concat_arg s2) { - auto new_size = s1.size + s2.size; - return string(new_size, true).append_unsafe(s1.as_tmp_string()).append_unsafe(s2.as_tmp_string()).finish_append(); -} - -string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3) { - auto new_size = s1.size + s2.size + s3.size; - return string(new_size, true).append_unsafe(s1.as_tmp_string()).append_unsafe(s2.as_tmp_string()).append_unsafe(s3.as_tmp_string()).finish_append(); -} - -string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4) { - auto new_size = s1.size + s2.size + s3.size + s4.size; - return string(new_size, true).append_unsafe(s1.as_tmp_string()).append_unsafe(s2.as_tmp_string()).append_unsafe(s3.as_tmp_string()).append_unsafe(s4.as_tmp_string()).finish_append(); -} - -string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4, str_concat_arg s5) { - auto new_size = s1.size + s2.size + s3.size + s4.size + s5.size; - return string(new_size, true).append_unsafe(s1.as_tmp_string()).append_unsafe(s2.as_tmp_string()).append_unsafe(s3.as_tmp_string()).append_unsafe(s4.as_tmp_string()).append_unsafe(s5.as_tmp_string()).finish_append(); + return string(s); } // Based on `getcsv` from `streams` -Optional> f$str_getcsv(const string &str, const string &delimiter, const string &enclosure, const string &escape) { +Optional> f$str_getcsv(const string &str, const string &delimiter, const string &enclosure, const string &escape) noexcept { char delimiter_char = ','; char enclosure_char = '"'; char escape_char = PHP_CSV_NO_ESCAPE; diff --git a/runtime/string_functions.h b/runtime/string_functions.h index 6e8b75b11f..7d74706256 100644 --- a/runtime/string_functions.h +++ b/runtime/string_functions.h @@ -1,470 +1,22 @@ // Compiler for PHP (aka KPHP) -// Copyright (c) 2020 LLC «V Kontakte» +// Copyright (c) 2024 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt #pragma once -#include "runtime-common/core/runtime-core.h" -#include - -extern const string COLON; -extern const string CP1251; -extern const string DOT; -extern const string NEW_LINE; -extern const string SPACE; -extern const string WHAT; - -constexpr int32_t PHP_BUF_LEN = (1 << 23);//TODO remove usages of static buffer -extern char php_buf[PHP_BUF_LEN + 1]; - -extern const char lhex_digits[17]; -extern const char uhex_digits[17]; - -extern int64_t str_replace_count_dummy; - -inline uint8_t hex_to_int(char c) noexcept; - - -string f$addcslashes(const string &str, const string &what); - -string f$addslashes(const string &str); - -string f$bin2hex(const string &str); - -string f$chop(const string &s, const string &what = WHAT); - -string f$chr(int64_t v); - -string f$convert_cyr_string(const string &str, const string &from_s, const string &to_s); - -mixed f$count_chars(const string &str, int64_t mode = 0); - -string f$hex2bin(const string &str); - -constexpr int64_t ENT_HTML401 = 0; -constexpr int64_t ENT_COMPAT = 0; -constexpr int64_t ENT_QUOTES = 1; -constexpr int64_t ENT_NOQUOTES = 2; - -string f$htmlentities(const string &str); - -string f$html_entity_decode(const string &str, int64_t flags = ENT_COMPAT | ENT_HTML401, const string &encoding = CP1251); - -string f$htmlspecialchars(const string &str, int64_t flags = ENT_COMPAT | ENT_HTML401); - -string f$htmlspecialchars_decode(const string &str, int64_t flags = ENT_COMPAT | ENT_HTML401); - -string f$lcfirst(const string &str); - -int64_t f$levenshtein(const string &str1, const string &str2); - -string f$ltrim(const string &s, const string &what = WHAT); - -string f$mysql_escape_string(const string &str); - -string f$nl2br(const string &str, bool is_xhtml = true); - -inline string f$number_format(double number, int64_t decimals = 0); - -inline string f$number_format(double number, int64_t decimals, const string &dec_point); - -inline string f$number_format(double number, int64_t decimals, const mixed &dec_point); - -string f$number_format(double number, int64_t decimals, const string &dec_point, const string &thousands_sep); - -inline string f$number_format(double number, int64_t decimals, const string &dec_point, const mixed &thousands_sep); - -inline string f$number_format(double number, int64_t decimals, const mixed &dec_point, const string &thousands_sep); - -inline string f$number_format(double number, int64_t decimals, const mixed &dec_point, const mixed &thousands_sep); - -int64_t f$ord(const string &s); - -string f$pack(const string &pattern, const array &a); - -string f$prepare_search_query(const string &query); - -int64_t f$printf(const string &format, const array &a); - -string f$rtrim(const string &s, const string &what = WHAT); - -Optional f$setlocale(int64_t category, const string &locale); - -string f$sprintf(const string &format, const array &a); - -string f$stripcslashes(const string &str); - -string f$stripslashes(const string &str); - -int64_t f$strcasecmp(const string &lhs, const string &rhs); - -int64_t f$strcmp(const string &lhs, const string &rhs); - -string f$strip_tags(const string &str, const array &allow); -string f$strip_tags(const string &str, const mixed &allow); -string f$strip_tags(const string &str, const array &allow_list); -string f$strip_tags(const string &str, const string &allow = string()); - -Optional f$stripos(const string &haystack, const string &needle, int64_t offset = 0); - -inline Optional f$stripos(const string &haystack, const mixed &needle, int64_t offset = 0); - -Optional f$stristr(const string &haystack, const string &needle, bool before_needle = false); - -inline Optional f$stristr(const string &haystack, const mixed &needle, bool before_needle = false); - -Optional f$strrchr(const string &haystack, const string &needle); - -inline int64_t f$strlen(const string &s); - -int64_t f$strncmp(const string &lhs, const string &rhs, int64_t len); - -int64_t f$strnatcmp(const string &lhs, const string &rhs); - -int64_t f$strspn(const string &hayshack, const string &char_list, int64_t offset = 0) noexcept; - -int64_t f$strcspn(const string &hayshack, const string &char_list, int64_t offset = 0) noexcept; - -Optional f$strpbrk(const string &haystack, const string &char_list); - -Optional f$strpos(const string &haystack, const string &needle, int64_t offset = 0); - -inline Optional f$strpos(const string &haystack, const mixed &needle, int64_t offset = 0); - -template -inline Optional f$strpos(const string &haystack, const Optional &needle, int64_t offset = 0); - -Optional f$strrpos(const string &haystack, const string &needle, int64_t offset = 0); - -inline Optional f$strrpos(const string &haystack, const mixed &needle, int64_t offset = 0); - -Optional f$strripos(const string &haystack, const string &needle, int64_t offset = 0); - -inline Optional f$strripos(const string &haystack, const mixed &needle, int64_t offset = 0); - -string f$strrev(const string &str); - -Optional f$strstr(const string &haystack, const string &needle, bool before_needle = false); - -inline Optional f$strstr(const string &haystack, const mixed &needle, bool before_needle = false); +#include -string f$strtolower(const string &str); - -string f$strtoupper(const string &str); - -string f$strtr(const string &subject, const string &from, const string &to); - -template -string f$strtr(const string &subject, const array &replace_pairs); - -inline string f$strtr(const string &subject, const mixed &from, const mixed &to); - -inline string f$strtr(const string &subject, const mixed &replace_pairs); - -const int64_t STR_PAD_LEFT = 0; -const int64_t STR_PAD_RIGHT = 1; -const int64_t STR_PAD_BOTH = 2; - -string f$str_pad(const string &input, int64_t len, const string &pad_str = SPACE, int64_t pad_type = STR_PAD_RIGHT); - -string f$str_repeat(const string &s, int64_t multiplier); - -string f$str_replace(const string &search, const string &replace, const string &subject, int64_t &replace_count = str_replace_count_dummy); -string f$str_ireplace(const string &search, const string &replace, const string &subject, int64_t &replace_count = str_replace_count_dummy); - -void str_replace_inplace(const string &search, const string &replace, string &subject, int64_t &replace_count, bool with_case); -string str_replace(const string &search, const string &replace, const string &subject, int64_t &replace_count, bool with_case); - -template -string str_replace_string_array(const array &search, const array &replace, const string &subject, int64_t &replace_count, bool with_case) { - string result = subject; - - string replace_value; - typename array::const_iterator cur_replace_val; - cur_replace_val = replace.begin(); - - for (typename array::const_iterator it = search.begin(); it != search.end(); ++it) { - if (cur_replace_val != replace.end()) { - replace_value = f$strval(cur_replace_val.get_value()); - ++cur_replace_val; - } else { - replace_value = string(); - } - - const string &search_string = f$strval(it.get_value()); - if (search_string.size() >= replace_value.size()) { - str_replace_inplace(search_string, replace_value, result, replace_count, with_case); - } else { - result = str_replace(search_string, replace_value, result, replace_count, with_case); - } - } - - return result; -}; - -template -string f$str_replace(const array &search, const array &replace, const string &subject, int64_t &replace_count = str_replace_count_dummy) { - replace_count = 0; - return str_replace_string_array(search, replace, subject, replace_count, true); -} -template -string f$str_ireplace(const array &search, const array &replace, const string &subject, int64_t &replace_count = str_replace_count_dummy) { - replace_count = 0; - return str_replace_string_array(search, replace, subject, replace_count, false); -} - -string f$str_replace(const mixed &search, const mixed &replace, const string &subject, int64_t &replace_count = str_replace_count_dummy); -string f$str_ireplace(const mixed &search, const mixed &replace, const string &subject, int64_t &replace_count = str_replace_count_dummy); - -template> -SubjectT f$str_replace(const T1 &search, const T2 &replace, const SubjectT &subject, int64_t &replace_count = str_replace_count_dummy) { - return f$str_replace(search, replace, subject.val(), replace_count); -} -template> -SubjectT f$str_ireplace(const T1 &search, const T2 &replace, const SubjectT &subject, int64_t &replace_count = str_replace_count_dummy) { - return f$str_ireplace(search, replace, subject.val(), replace_count); -} - -mixed f$str_replace(const mixed &search, const mixed &replace, const mixed &subject, int64_t &replace_count = str_replace_count_dummy); -mixed f$str_ireplace(const mixed &search, const mixed &replace, const mixed &subject, int64_t &replace_count = str_replace_count_dummy); - -array f$str_split(const string &str, int64_t split_length = 1); - -Optional f$substr(const string &str, int64_t start, int64_t length = std::numeric_limits::max()); -Optional f$substr(tmp_string, int64_t start, int64_t length = std::numeric_limits::max()); - -tmp_string f$_tmp_substr(const string &str, int64_t start, int64_t length = std::numeric_limits::max()); -tmp_string f$_tmp_substr(tmp_string str, int64_t start, int64_t length = std::numeric_limits::max()); - -int64_t f$substr_count(const string &haystack, const string &needle, int64_t offset = 0, int64_t length = std::numeric_limits::max()); - -string f$substr_replace(const string &str, const string &replacement, int64_t start, int64_t length = std::numeric_limits::max()); - -Optional f$substr_compare(const string &main_str, const string &str, int64_t offset, int64_t length = std::numeric_limits::max(), bool case_insensitivity = false); - -bool f$str_starts_with(const string &haystack, const string &needle); - -bool f$str_ends_with(const string &haystack, const string &needle); - -tmp_string f$_tmp_trim(tmp_string s, const string &what = WHAT); -tmp_string f$_tmp_trim(const string &s, const string &what = WHAT); -string f$trim(tmp_string s, const string &what = WHAT); -string f$trim(const string &s, const string &what = WHAT); - -string f$ucfirst(const string &str); - -string f$ucwords(const string &str); - -Optional> f$unpack(const string &pattern, const string &data); - -int64_t f$vprintf(const string &format, const array &args); - -string f$vsprintf(const string &format, const array &args); - -string f$wordwrap(const string &str, int64_t width = 75, const string &brk = NEW_LINE, bool cut = false); - -Optional> f$str_getcsv(const string &s, const string &delimiter = string(1, ','), - const string &enclosure = string(1, '\"'), const string &escape = string(1, '\\')); - -/* - * - * IMPLEMENTATION - * - */ - -namespace impl_ { - -struct Hex2CharMapMaker { -private: - static constexpr uint8_t hex2int_char(size_t c) noexcept { - return ('0' <= c && c <= '9') ? static_cast(c - '0') : - ('a' <= c && c <= 'f') ? static_cast(c - 'a' + 10) : - ('A' <= c && c <= 'F') ? static_cast(c - 'A' + 10) : 16; - } - -public: - template - static constexpr auto make(std::index_sequence) noexcept { - return std::array{ - { - hex2int_char(Ints)..., - }}; - } -}; - -} // namepsace impl_ - -uint8_t hex_to_int(char c) noexcept { - static constexpr auto hex_int_map = impl_::Hex2CharMapMaker::make(std::make_index_sequence<256>()); - return hex_int_map[static_cast(c)]; -} - -string f$number_format(double number, int64_t decimals) { - return f$number_format(number, decimals, DOT, COLON); -} - -string f$number_format(double number, int64_t decimals, const string &dec_point) { - return f$number_format(number, decimals, dec_point, COLON); -} - -string f$number_format(double number, int64_t decimals, const mixed &dec_point) { - return f$number_format(number, decimals, dec_point.is_null() ? DOT : dec_point.to_string(), COLON); -} - -string f$number_format(double number, int64_t decimals, const string &dec_point, const mixed &thousands_sep) { - return f$number_format(number, decimals, dec_point, thousands_sep.is_null() ? COLON : thousands_sep.to_string()); -} - -string f$number_format(double number, int64_t decimals, const mixed &dec_point, const string &thousands_sep) { - return f$number_format(number, decimals, dec_point.is_null() ? DOT : dec_point.to_string(), thousands_sep); -} - -string f$number_format(double number, int64_t decimals, const mixed &dec_point, const mixed &thousands_sep) { - return f$number_format(number, decimals, dec_point.is_null() ? DOT : dec_point.to_string(), thousands_sep.is_null() ? COLON : thousands_sep.to_string()); -} - -int64_t f$strlen(const string &s) { - return s.size(); -} - -Optional f$stripos(const string &haystack, const mixed &needle, int64_t offset) { - if (needle.is_string()) { - return f$stripos(haystack, needle.to_string(), offset); - } else { - return f$stripos(haystack, string(1, (char)needle.to_int()), offset); - } -} - -Optional f$stristr(const string &haystack, const mixed &needle, bool before_needle) { - if (needle.is_string()) { - return f$stristr(haystack, needle.to_string(), before_needle); - } else { - return f$stristr(haystack, string(1, (char)needle.to_int()), before_needle); - } -} - -template -inline Optional f$strpos(const string &haystack, const Optional &needle, int64_t offset) { - return f$strpos(haystack, needle.val(), offset); -} - -Optional f$strpos(const string &haystack, const mixed &needle, int64_t offset) { - if (needle.is_string()) { - return f$strpos(haystack, needle.to_string(), offset); - } else { - return f$strpos(haystack, string(1, (char)needle.to_int()), offset); - } -} - -Optional f$strrpos(const string &haystack, const mixed &needle, int64_t offset) { - if (needle.is_string()) { - return f$strrpos(haystack, needle.to_string(), offset); - } else { - return f$strrpos(haystack, string(1, (char)needle.to_int()), offset); - } -} - -Optional f$strripos(const string &haystack, const mixed &needle, int64_t offset) { - if (needle.is_string()) { - return f$strripos(haystack, needle.to_string(), offset); - } else { - return f$strripos(haystack, string(1, (char)needle.to_int()), offset); - } -} - -Optional f$strstr(const string &haystack, const mixed &needle, bool before_needle) { - if (needle.is_string()) { - return f$strstr(haystack, needle.to_string(), before_needle); - } else { - return f$strstr(haystack, string(1, (char)needle.to_int()), before_needle); - } -} - -template -string f$strtr(const string &subject, const array &replace_pairs) { - const char *piece = subject.c_str(), *piece_end = subject.c_str() + subject.size(); - string result; - while (1) { - const char *best_pos = nullptr; - int64_t best_len = -1; - string replace; - for (typename array::const_iterator p = replace_pairs.begin(); p != replace_pairs.end(); ++p) { - const string search = f$strval(p.get_key()); - int64_t search_len = search.size(); - if (search_len == 0) { - return subject; - } - const char *pos = static_cast (memmem(static_cast (piece), (size_t)(piece_end - piece), static_cast (search.c_str()), (size_t)search_len)); - if (pos != nullptr && (best_pos == nullptr || best_pos > pos || (best_pos == pos && search_len > best_len))) { - best_pos = pos; - best_len = search_len; - replace = f$strval(p.get_value()); - } - } - if (best_pos == nullptr) { - result.append(piece, static_cast(piece_end - piece)); - break; - } - - result.append(piece, static_cast(best_pos - piece)); - result.append(replace); - - piece = best_pos + best_len; - } - - return result; -} - -inline string f$strtr(const string &subject, const mixed &from, const mixed &to) { - return f$strtr(subject, from.to_string(), to.to_string()); -} - -inline string f$strtr(const string &subject, const mixed &replace_pairs) { - return f$strtr(subject, replace_pairs.as_array("strtr")); -} - -string f$xor_strings(const string &s, const string &t); - -namespace impl_ { -extern double default_similar_text_percent_stub; -} // namespace impl_ -int64_t f$similar_text(const string &first, const string &second, double &percent = impl_::default_similar_text_percent_stub); +#include "runtime-common/core/runtime-core.h" +#include "runtime-common/stdlib/string/string-context.h" -// similar_text ( string $first , string $second [, float &$percent ] ) : int +int64_t f$printf(const string &format, const array &a) noexcept; -// str_concat_arg generalizes both tmp_string and string arguments; -// it can be constructed from both of them, so concat functions can operate -// on both tmp_string and string types -// there is a special (string, string) overloading for concat2 to -// allow the empty string result optimization to kick in -struct str_concat_arg { - const char *data; - string::size_type size; +int64_t f$vprintf(const string &format, const array &args) noexcept; - str_concat_arg(const string &s) : data{s.c_str()}, size{s.size()} {} - str_concat_arg(tmp_string s) : data{s.data}, size{s.size} {} +Optional f$setlocale(int64_t category, const string &locale) noexcept; - tmp_string as_tmp_string() const noexcept { - return {data, size}; - } -}; +string f$prepare_search_query(const string &query) noexcept; -// str_concat functions implement efficient string-typed `.` (concatenation) operator implementation; -// apart from being machine-code size efficient (a function call is more compact), they're also -// usually faster as runtime is compiled with -O3 which is almost never the case for translated C++ code -// (it's either -O2 or -Os most of the time) -// -// we choose to have 4 functions (up to 5 arguments) because of the frequency distribution: -// 37619: 2 args -// 20616: 3 args -// 4534: 5 args -// 3791: 4 args -// 935: 7 args -// 565: 6 args -// 350: 9 args -// Both 6 and 7 argument combination already look infrequent enough to not bother -string str_concat(const string &s1, const string &s2); -string str_concat(str_concat_arg s1, str_concat_arg s2); -string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3); -string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4); -string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4, str_concat_arg s5); +Optional> f$str_getcsv(const string &s, const string &delimiter = StringLibConstants::get().COMMA_STR, + const string &enclosure = StringLibConstants::get().QUOTE_STR, + const string &escape = StringLibConstants::get().BACKSLASH_STR) noexcept; diff --git a/runtime/tcp.cpp b/runtime/tcp.cpp index 0176df3650..f3227b4c28 100644 --- a/runtime/tcp.cpp +++ b/runtime/tcp.cpp @@ -7,12 +7,12 @@ #include #include +#include "runtime-common/stdlib/string/string-functions.h" #include "runtime/allocator.h" #include "runtime/critical_section.h" #include "runtime/datetime/datetime_functions.h" #include "runtime/net_events.h" #include "runtime/streams.h" -#include "runtime/string_functions.h" #include "runtime/tcp.h" namespace { diff --git a/runtime/udp.cpp b/runtime/udp.cpp index ede9928675..e4ffe6023c 100644 --- a/runtime/udp.cpp +++ b/runtime/udp.cpp @@ -9,13 +9,12 @@ #include #include "common/resolver.h" - +#include "runtime-common/stdlib/string/string-functions.h" #include "runtime/allocator.h" #include "runtime/critical_section.h" #include "runtime/datetime/datetime_functions.h" #include "runtime/net_events.h" #include "runtime/streams.h" -#include "runtime/string_functions.h" #include "runtime/url.h" int DEFAULT_SOCKET_TIMEOUT = 60; diff --git a/runtime/url.cpp b/runtime/url.cpp index 2ed7e6fc5d..62564c1ba3 100644 --- a/runtime/url.cpp +++ b/runtime/url.cpp @@ -5,7 +5,8 @@ #include "runtime/url.h" #include "common/macos-ports.h" - +#include "runtime-common/stdlib/string/string-context.h" +#include "runtime-common/stdlib/string/string-functions.h" #include "runtime/array_functions.h" #include "runtime/regexp.h" @@ -490,8 +491,8 @@ string f$rawurlencode(const string &s) { kphp_runtime_context.static_SB.append_char(s[i]); } else { kphp_runtime_context.static_SB.append_char('%'); - kphp_runtime_context.static_SB.append_char(uhex_digits[(s[i] >> 4) & 15]); - kphp_runtime_context.static_SB.append_char(uhex_digits[s[i] & 15]); + kphp_runtime_context.static_SB.append_char(StringLibConstants::get().uhex_digits[(s[i] >> 4) & 15]); + kphp_runtime_context.static_SB.append_char(StringLibConstants::get().uhex_digits[s[i] & 15]); } } return kphp_runtime_context.static_SB.str(); @@ -528,8 +529,8 @@ string f$urlencode(const string &s) { kphp_runtime_context.static_SB.append_char('+'); } else { kphp_runtime_context.static_SB.append_char('%'); - kphp_runtime_context.static_SB.append_char(uhex_digits[(s[i] >> 4) & 15]); - kphp_runtime_context.static_SB.append_char(uhex_digits[s[i] & 15]); + kphp_runtime_context.static_SB.append_char(StringLibConstants::get().uhex_digits[(s[i] >> 4) & 15]); + kphp_runtime_context.static_SB.append_char(StringLibConstants::get().uhex_digits[s[i] & 15]); } } return kphp_runtime_context.static_SB.str(); diff --git a/runtime/zlib.cpp b/runtime/zlib.cpp index 3d9d48c234..b58bef515a 100644 --- a/runtime/zlib.cpp +++ b/runtime/zlib.cpp @@ -4,19 +4,19 @@ #include "runtime/zlib.h" +#include "runtime-common/stdlib/string/string-context.h" #include "runtime/context/runtime-context.h" #include "runtime/critical_section.h" -#include "runtime/string_functions.h" namespace { voidpf zlib_static_alloc(voidpf opaque, uInt items, uInt size) { int *buf_pos = (int *)opaque; - php_assert (items != 0 && (PHP_BUF_LEN - *buf_pos) / items >= size); + php_assert (items != 0 && (StringLibContext::STATIC_BUFFER_LENGTH - *buf_pos) / items >= size); int pos = *buf_pos; *buf_pos += items * size; - php_assert (*buf_pos <= PHP_BUF_LEN); - return php_buf + pos; + php_assert (*buf_pos <= StringLibContext::STATIC_BUFFER_LENGTH); + return StringLibContext::get().static_buf.data() + pos; } void zlib_static_free(voidpf opaque __attribute__((unused)), voidpf address __attribute__((unused))) {} @@ -248,8 +248,8 @@ static string::size_type zlib_decode_raw(vk::string_view s, int encoding) { strm.opaque = Z_NULL; strm.avail_in = s.size(); strm.next_in = reinterpret_cast(const_cast(s.data())); - strm.avail_out = PHP_BUF_LEN; - strm.next_out = reinterpret_cast(php_buf); + strm.avail_out = StringLibContext::STATIC_BUFFER_LENGTH; + strm.next_out = reinterpret_cast(StringLibContext::get().static_buf.data()); int ret = inflateInit2(&strm, encoding); if (ret != Z_OK) { @@ -270,13 +270,13 @@ static string::size_type zlib_decode_raw(vk::string_view s, int encoding) { return -1; } - int res_len = PHP_BUF_LEN - strm.avail_out; + int res_len = StringLibContext::STATIC_BUFFER_LENGTH - strm.avail_out; if (strm.avail_out == 0 && ret != Z_STREAM_END) { inflateEnd(&strm); dl::leave_critical_section(); - php_warning("size of unpacked data is greater then %d. Can't decode.", PHP_BUF_LEN); + php_warning("size of unpacked data is greater then %d. Can't decode.", StringLibContext::STATIC_BUFFER_LENGTH); return -1; } @@ -292,7 +292,7 @@ const char *gzuncompress_raw(vk::string_view s, string::size_type *result_len) { return ""; } *result_len = len; - return php_buf; + return StringLibContext::get().static_buf.data(); } string zlib_decode(const string &s, int encoding) { @@ -300,7 +300,7 @@ string zlib_decode(const string &s, int encoding) { if (len == -1u) { return {}; } - return {php_buf, static_cast(len)}; + return {StringLibContext::get().static_buf.data(), static_cast(len)}; } string f$gzdecode(const string &s) { diff --git a/runtime/zstd.cpp b/runtime/zstd.cpp index 683265411d..e9ca4ee673 100644 --- a/runtime/zstd.cpp +++ b/runtime/zstd.cpp @@ -7,14 +7,13 @@ #include #include "common/smart_ptrs/unique_ptr_with_delete_function.h" - -#include "runtime/string_functions.h" +#include "runtime-common/stdlib/string/string-context.h" #include "runtime/allocator.h" #include "runtime/zstd.h" namespace { -static_assert(2 * ZSTD_BLOCKSIZE_MAX < PHP_BUF_LEN, "double block size is expected to be less then buffer size"); +static_assert(2 * ZSTD_BLOCKSIZE_MAX < StringLibContext::STATIC_BUFFER_LENGTH, "double block size is expected to be less then buffer size"); ZSTD_customMem make_custom_alloc() noexcept { return ZSTD_customMem{ @@ -49,8 +48,8 @@ Optional zstd_compress_impl(const string &data, int64_t level = DEFAULT_ return false; } - php_assert(ZSTD_CStreamOutSize() <= PHP_BUF_LEN); - ZSTD_outBuffer out{php_buf, PHP_BUF_LEN, 0}; + php_assert(ZSTD_CStreamOutSize() <= StringLibContext::STATIC_BUFFER_LENGTH); + ZSTD_outBuffer out{StringLibContext::get().static_buf.data(), StringLibContext::STATIC_BUFFER_LENGTH, 0}; ZSTD_inBuffer in{data.c_str(), data.size(), 0}; string encoded_string; @@ -104,9 +103,9 @@ Optional zstd_uncompress_impl(const string &data, const string &dict = s return false; } - php_assert(ZSTD_DStreamOutSize() <= PHP_BUF_LEN); + php_assert(ZSTD_DStreamOutSize() <= StringLibContext::STATIC_BUFFER_LENGTH); ZSTD_inBuffer in{data.c_str(), data.size(), 0}; - ZSTD_outBuffer out{php_buf, PHP_BUF_LEN, 0}; + ZSTD_outBuffer out{StringLibContext::get().static_buf.data(), StringLibContext::STATIC_BUFFER_LENGTH, 0}; string decoded_string; while (in.pos < in.size) { diff --git a/tests/cpp/runtime/_runtime-tests-env.cpp b/tests/cpp/runtime/_runtime-tests-env.cpp index 9fc7c59185..5ad513e9b9 100644 --- a/tests/cpp/runtime/_runtime-tests-env.cpp +++ b/tests/cpp/runtime/_runtime-tests-env.cpp @@ -32,7 +32,7 @@ class RuntimeTestsEnvironment final : public testing::Environment { global_init_script_allocator(); init_runtime_environment(null_query_data{}, PhpScriptMutableGlobals::current().get_superglobals(), script_memory, script_memory_size); - KphpCoreContext::current().php_disable_warnings = true; + RuntimeContext::get().php_disable_warnings = true; php_warning_level = 0; } diff --git a/tests/cpp/runtime/string-test.cpp b/tests/cpp/runtime/string-test.cpp index ae2177fabc..a205e5c9f6 100644 --- a/tests/cpp/runtime/string-test.cpp +++ b/tests/cpp/runtime/string-test.cpp @@ -1,7 +1,7 @@ #include #include "runtime-common/core/runtime-core.h" -#include "runtime/string_functions.h" +#include "runtime-common/stdlib/string/string-functions.h" TEST(string_test, test_empty) { string empty_str; @@ -158,4 +158,4 @@ TEST(string_test, test_hex_to_int) { ASSERT_EQ(hex_to_int('D'), 13); ASSERT_EQ(hex_to_int('E'), 14); ASSERT_EQ(hex_to_int('F'), 15); -} \ No newline at end of file +} diff --git a/tests/cpp/runtime/zstd-test.cpp b/tests/cpp/runtime/zstd-test.cpp index 30982d9c4c..e31ea84843 100644 --- a/tests/cpp/runtime/zstd-test.cpp +++ b/tests/cpp/runtime/zstd-test.cpp @@ -2,12 +2,12 @@ #include -#include "runtime/string_functions.h" +#include "runtime-common/stdlib/string/string-context.h" TEST(zstd_test, test_bounds) { - ASSERT_LE(ZSTD_CStreamOutSize(), PHP_BUF_LEN); - ASSERT_LE(ZSTD_CStreamInSize(), PHP_BUF_LEN); + ASSERT_LE(ZSTD_CStreamOutSize(), StringLibContext::STATIC_BUFFER_LENGTH); + ASSERT_LE(ZSTD_CStreamInSize(), StringLibContext::STATIC_BUFFER_LENGTH); - ASSERT_LE(ZSTD_DStreamOutSize(), PHP_BUF_LEN); - ASSERT_LE(ZSTD_DStreamInSize(), PHP_BUF_LEN); + ASSERT_LE(ZSTD_DStreamOutSize(), StringLibContext::STATIC_BUFFER_LENGTH); + ASSERT_LE(ZSTD_DStreamInSize(), StringLibContext::STATIC_BUFFER_LENGTH); }