From e06286d721f7538e379fb9853f36a9dfa6d7acb8 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Sun, 18 Dec 2022 21:23:04 +0300 Subject: [PATCH 01/25] kphp-yaml v0.1: parse, emit, parse_file, emit_file, c++ tests --- builtin-functions/_functions.txt | 5 + runtime/runtime.cmake | 1 + runtime/yaml.cpp | 169 +++++++++++++++++++++ runtime/yaml.h | 23 +++ tests/cpp/runtime/runtime-tests.cmake | 1 + tests/cpp/runtime/yaml-test.cpp | 211 ++++++++++++++++++++++++++ 6 files changed, 410 insertions(+) create mode 100644 runtime/yaml.cpp create mode 100644 runtime/yaml.h create mode 100644 tests/cpp/runtime/yaml-test.cpp diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt index f4f9cd13fe..c17c686e01 100644 --- a/builtin-functions/_functions.txt +++ b/builtin-functions/_functions.txt @@ -1611,3 +1611,8 @@ class DateTimeImmutable implements DateTimeInterface { public function getOffset(): int; public function getTimestamp(): int; } + +function yaml_emit_file ($filename ::: string, $data ::: mixed) ::: bool; +function yaml_emit ($data ::: mixed) ::: string; +function yaml_parse_file ($filename ::: string, $pos ::: int = 0) ::: mixed; +function yaml_parse ($data ::: string, $pos ::: int = 0) ::: mixed; diff --git a/runtime/runtime.cmake b/runtime/runtime.cmake index bf6df3dcde..cfa9a155c3 100644 --- a/runtime/runtime.cmake +++ b/runtime/runtime.cmake @@ -113,6 +113,7 @@ prepend(KPHP_RUNTIME_SOURCES ${BASE_DIR}/runtime/ vkext.cpp vkext_stats.cpp ffi.cpp + yaml.cpp zlib.cpp zstd.cpp) diff --git a/runtime/yaml.cpp b/runtime/yaml.cpp new file mode 100644 index 0000000000..60e37cc49c --- /dev/null +++ b/runtime/yaml.cpp @@ -0,0 +1,169 @@ +#include + +#include "runtime/optional.h" +#include "runtime/streams.h" +#include "runtime/critical_section.h" + +#include "runtime/yaml.h" + +// TODO: add key-arrays support +void yaml_node_to_mixed(const YAML::Node &node, mixed &data) { + data.clear(); + if (node.IsNull()) { + php_warning("Yaml node is null. Skipping it"); + return; + } + if (node.IsScalar()) { + string string_data = string(node.as().c_str()); + if (string_data.is_int()) { + data = string_data.to_int(); + return; + } + dl::enter_critical_section(); + double *float_data = new double; + if (string_data.try_to_float(float_data)) { + data = *float_data; + } + else data = string_data; + delete float_data; + dl::leave_critical_section(); + return; + } + if (node.IsSequence()) { + for (auto it = node.begin(); it != node.end(); ++it) { + mixed data_piece; + yaml_node_to_mixed(*it, data_piece); + data.push_back(data_piece); + } + } + else if (node.IsMap()) { + for (auto it : node) { + mixed data_piece; + yaml_node_to_mixed(it.second, data_piece); + data[string(it.first.as().c_str())] = data_piece; + } + } +} + +// TODO: rewrite using switch +// TODO: add key-arrays support +void mixed_to_yaml_node(const mixed &data, YAML::Node &node) { + if (!data.is_array()) { + if (data.is_null()) { + php_warning("Cannot convert (mixed)null into yaml node"); + return; + } + if (data.is_string()) { + node = std::string(data.as_string().c_str()); + return; + } + if (data.is_int()) { + node = data.as_int(); + return; + } + if (data.is_float()) { + node = data.as_double(); + return; + } + if (data.is_bool()) { + if (!data.as_bool()) { + node = false; + } + else { + node = true; + } + return; + } + } + array data_array = data.as_array(); + if (data_array.is_pseudo_vector()) { + for (auto it = data_array.begin(); it != data_array.end(); ++it) { + mixed data_piece = it.get_value(); + if (data_piece.is_array()) { + YAML::Node node_piece; + mixed_to_yaml_node(data_piece, node_piece); + node.push_back(node_piece); + } + else if (data_piece.is_string()) { + node.push_back(std::string(data_piece.as_string().c_str())); + } + else if (data_piece.is_int()) { + node.push_back(data_piece.as_int()); + } + else if (data_piece.is_float()) { + node.push_back(data_piece.as_double()); + } + else if (data_piece.is_bool()) { + node.push_back(data_piece.as_bool()); + } + else if (data_piece.is_null()) { + php_warning("Data piece is null. Skipping it"); + } + else php_warning("Unknown data type. Skipping it"); // maybe this is redundant? + } + } else { + for (auto it = data_array.begin(); it != data_array.end(); ++it) { + mixed data_piece = it.get_value(); + YAML::Node node_piece; + mixed_to_yaml_node(data_piece, node_piece); + mixed data_key = it.get_key(); + if (data_key.is_null() || data_key.is_array()) { + php_warning("Key is null or an array. Cannot handle it. Pushing data as in a vector"); + node.push_back(node_piece); + } + else if (data_key.is_string()) { + node[std::string(data_key.as_string().c_str())] = node_piece; + } + else if (data_key.is_int()) { // float and bool are cast to int + node[data_key.as_int()] = node_piece; + } + else { // maybe this is redundant? + php_warning("Unknown key type. Pushing data as in a vector"); + node.push_back(node_piece); + } + } + } +} + +bool f$yaml_emit_file(const string &filename, const mixed &data) { + if (filename.empty()) { + php_warning("Filename cannot be empty"); + return false; + } + string emitted_data = f$yaml_emit(data); + Optional size = f$file_put_contents(filename, emitted_data); + if (size.is_false()) { + php_warning("Error while writing to file \"%s\"", filename.c_str()); + return false; + } + return true; +} + +string f$yaml_emit(const mixed &data) { + YAML::Node node; + mixed_to_yaml_node(data, node); + return string(YAML::Dump(node).c_str()); +} + +mixed f$yaml_parse_file(const string &filename, int pos) { + if (filename.empty()) { + php_warning("Filename cannot be empty"); + return {}; + } + Optional data = f$file_get_contents(filename); + if (data.is_false()) { + php_warning("Error while reading file \"%s\"", filename.c_str()); + return {}; + } + return f$yaml_parse(data.ref(), pos); +} + +mixed f$yaml_parse(const string &data, int pos) { + if (pos < 0) { + php_warning("Argument \"pos\" = %d. Values other than 0 are not supported yet. Setting to default (pos = 0)", pos); + } + YAML::Node node = YAML::Load(data.c_str()); + mixed parsed_data; + yaml_node_to_mixed(node, parsed_data); + return parsed_data; +} diff --git a/runtime/yaml.h b/runtime/yaml.h new file mode 100644 index 0000000000..0a694ef596 --- /dev/null +++ b/runtime/yaml.h @@ -0,0 +1,23 @@ +#pragma once + +#include "runtime/kphp_core.h" + +/* + * emit mixed into a yaml file + */ +bool f$yaml_emit_file(const string &filename, const mixed &data); + +/* + * emit mixed into a yaml string + */ +string f$yaml_emit(const mixed &data); + +/* + * parse yaml file into mixed + */ +mixed f$yaml_parse_file(const string &filename, int pos = 0); + +/* + * parse yaml string into mixed + */ +mixed f$yaml_parse(const string &data, int pos = 0); \ No newline at end of file diff --git a/tests/cpp/runtime/runtime-tests.cmake b/tests/cpp/runtime/runtime-tests.cmake index 32f74e5d7a..621dbb33a1 100644 --- a/tests/cpp/runtime/runtime-tests.cmake +++ b/tests/cpp/runtime/runtime-tests.cmake @@ -19,6 +19,7 @@ prepend(RUNTIME_TESTS_SOURCES ${BASE_DIR}/tests/cpp/runtime/ memory_resource/unsynchronized_pool_resource-test.cpp string-list-test.cpp string-test.cpp + yaml-test.cpp zstd-test.cpp) allow_deprecated_declarations_for_apple(${BASE_DIR}/tests/cpp/runtime/inter-process-mutex-test.cpp) diff --git a/tests/cpp/runtime/yaml-test.cpp b/tests/cpp/runtime/yaml-test.cpp new file mode 100644 index 0000000000..60fbc83693 --- /dev/null +++ b/tests/cpp/runtime/yaml-test.cpp @@ -0,0 +1,211 @@ +#include + +#include "runtime/files.h" +#include "runtime/yaml.h" + +TEST(yaml_test, test_yaml_string) { + mixed example = string("string"); + mixed result = f$yaml_parse(f$yaml_emit(example)); + ASSERT_TRUE(result.is_string()); + ASSERT_TRUE(example.as_string() == result.as_string()); +} + +TEST(yaml_test, test_yaml_int) { + mixed example = 13; + mixed result = f$yaml_parse(f$yaml_emit(example)); + ASSERT_TRUE(result.is_int()); + ASSERT_EQ(example.as_int(), result.as_int()); +} + +TEST(yaml_test, test_yaml_float) { + mixed example = 3.1416; + mixed result = f$yaml_parse(f$yaml_emit(example)); + ASSERT_TRUE(result.is_float()); + ASSERT_DOUBLE_EQ(example.as_double(), result.as_double()); +} + +TEST(yaml_test, test_yaml_vector) { + mixed example; + example.push_back(string("string")); + example.push_back(13); + example.push_back(3.1416); + mixed result = f$yaml_parse(f$yaml_emit(example)); + ASSERT_TRUE(result.is_array()); + ASSERT_TRUE(result.as_array().is_pseudo_vector()); + ASSERT_TRUE(result[0].is_string()); + ASSERT_TRUE(example[0].as_string() == result[0].as_string()); + ASSERT_TRUE(result[1].is_int()); + ASSERT_EQ(example[1].as_int(), result[1].as_int()); + ASSERT_TRUE(result[2].is_float()); + ASSERT_DOUBLE_EQ(example[2].as_double(), result[2].as_double()); +} + +TEST(yaml_test, test_yaml_vector_recursive) { + mixed example, copy; + example.push_back(string("string")); + example.push_back(13); + example.push_back(3.1416); + for (auto it = example.begin(); it != example.end(); ++it) { + copy.push_back(it.get_value()); + } + example.push_back(copy); + mixed result = f$yaml_parse(f$yaml_emit(example)); + ASSERT_TRUE(result[3].is_array()); + ASSERT_TRUE(result[3].as_array().is_pseudo_vector()); + ASSERT_TRUE(result[3][0].is_string()); + ASSERT_TRUE(example[3][0].as_string() == result[3][0].as_string()); + ASSERT_TRUE(result[3][1].is_int()); + ASSERT_EQ(example[3][1].as_int(), result[3][1].as_int()); + ASSERT_TRUE(result[3][2].is_float()); + ASSERT_DOUBLE_EQ(example[3][2].as_double(), result[3][2].as_double()); +} + +TEST(yaml_test, test_yaml_map) { + mixed example; + example[string("first")] = string("string"); + example[string("second")] = 13; + example[string("third")] = 3.1416; + mixed result = f$yaml_parse(f$yaml_emit(example)); + ASSERT_TRUE(result.is_array()); + ASSERT_FALSE(result.as_array().is_pseudo_vector()); + ASSERT_TRUE(result[string("first")].is_string()); + ASSERT_TRUE(example[string("first")].as_string() == result[string("first")].as_string()); + ASSERT_TRUE(result[string("second")].is_int()); + ASSERT_EQ(example[string("second")].as_int(), result[string("second")].as_int()); + ASSERT_TRUE(result[string("third")].is_float()); + ASSERT_DOUBLE_EQ(example[string("third")].as_double(), result[string("third")].as_double()); +} + +TEST(yaml_test, test_yaml_map_recursive) { + mixed example, copy; + example[string("first")] = string("string"); + example[string("second")] = 13; + example[string("third")] = 3.1416; + for (auto it = example.begin(); it != example.end(); ++it) { + copy[it.get_key()] = it.get_value(); + } + example[string("self")] = copy; + mixed result = f$yaml_parse(f$yaml_emit(example)); + ASSERT_TRUE(result[string("self")].is_array()); + ASSERT_FALSE(result[string("self")].as_array().is_pseudo_vector()); + ASSERT_TRUE(result[string("self")][string("first")].is_string()); + ASSERT_TRUE(example[string("self")][string("first")].as_string() == result[string("self")][string("first")].as_string()); + ASSERT_TRUE(result[string("self")][string("second")].is_int()); + ASSERT_EQ(example[string("self")][string("second")].as_int(), result[string("self")][string("second")].as_int()); + ASSERT_TRUE(result[string("self")][string("third")].is_float()); + ASSERT_DOUBLE_EQ(example[string("self")][string("third")].as_double(), result[string("self")][string("third")].as_double()); +} + +TEST(yaml_test, test_yaml_string_file) { + mixed example = string("string"); + string filename("test_yaml_string"); + ASSERT_TRUE(f$yaml_emit_file(filename, example)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result.is_string()); + ASSERT_TRUE(example.as_string() == result.as_string()); + ASSERT_TRUE(f$unlink(filename)); +} + +TEST(yaml_test, test_yaml_int_file) { + mixed example = 13; + string filename("test_yaml_int"); + ASSERT_TRUE(f$yaml_emit_file(filename, example)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result.is_int()); + ASSERT_EQ(example.as_int(), result.as_int()); + ASSERT_TRUE(f$unlink(filename)); +} + +TEST(yaml_test, test_yaml_float_file) { + mixed example = 3.1416; + string filename("test_yaml_float"); + ASSERT_TRUE(f$yaml_emit_file(filename, example)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result.is_float()); + ASSERT_DOUBLE_EQ(example.as_double(), result.as_double()); + ASSERT_TRUE(f$unlink(filename)); +} + +TEST(yaml_test, test_yaml_vector_file) { + mixed example; + example.push_back(string("string")); + example.push_back(13); + example.push_back(3.1416); + string filename("test_yaml_vector"); + ASSERT_TRUE(f$yaml_emit_file(filename, example)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result.is_array()); + ASSERT_TRUE(result.as_array().is_pseudo_vector()); + ASSERT_TRUE(result[0].is_string()); + ASSERT_TRUE(example[0].as_string() == result[0].as_string()); + ASSERT_TRUE(result[1].is_int()); + ASSERT_EQ(example[1].as_int(), result[1].as_int()); + ASSERT_TRUE(result[2].is_float()); + ASSERT_DOUBLE_EQ(example[2].as_double(), result[2].as_double()); + ASSERT_TRUE(f$unlink(filename)); +} + +TEST(yaml_test, test_yaml_vector_recursive_file) { + mixed example, copy; + example.push_back(string("string")); + example.push_back(13); + example.push_back(3.1416); + for (auto it = example.begin(); it != example.end(); ++it) { + copy.push_back(it.get_value()); + } + example.push_back(copy); + string filename("test_yaml_vector_recursive"); + ASSERT_TRUE(f$yaml_emit_file(filename, example)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result[3].is_array()); + ASSERT_TRUE(result[3].as_array().is_pseudo_vector()); + ASSERT_TRUE(result[3][0].is_string()); + ASSERT_TRUE(example[3][0].as_string() == result[3][0].as_string()); + ASSERT_TRUE(result[3][1].is_int()); + ASSERT_EQ(example[3][1].as_int(), result[3][1].as_int()); + ASSERT_TRUE(result[3][2].is_float()); + ASSERT_DOUBLE_EQ(example[3][2].as_double(), result[3][2].as_double()); + ASSERT_TRUE(f$unlink(filename)); +} + +TEST(yaml_test, test_yaml_map_file) { + mixed example; + example[string("first")] = string("string"); + example[string("second")] = 13; + example[string("third")] = 3.1416; + string filename("test_yaml_map"); + ASSERT_TRUE(f$yaml_emit_file(filename, example)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result.is_array()); + ASSERT_FALSE(result.as_array().is_pseudo_vector()); + ASSERT_TRUE(result[string("first")].is_string()); + ASSERT_TRUE(example[string("first")].as_string() == result[string("first")].as_string()); + ASSERT_TRUE(result[string("second")].is_int()); + ASSERT_EQ(example[string("second")].as_int(), result[string("second")].as_int()); + ASSERT_TRUE(result[string("third")].is_float()); + ASSERT_DOUBLE_EQ(example[string("third")].as_double(), result[string("third")].as_double()); + ASSERT_TRUE(f$unlink(filename)); +} + +TEST(yaml_test, test_yaml_map_recursive_file) { + mixed example, copy; + example[string("first")] = string("string"); + example[string("second")] = 13; + example[string("third")] = 3.1416; + for (auto it = example.begin(); it != example.end(); ++it) { + copy[it.get_key()] = it.get_value(); + } + example[string("self")] = copy; + string filename("test_yaml_map_recursive"); + ASSERT_TRUE(f$yaml_emit_file(filename, example)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result[string("self")].is_array()); + ASSERT_FALSE(result[string("self")].as_array().is_pseudo_vector()); + ASSERT_TRUE(result[string("self")][string("first")].is_string()); + ASSERT_TRUE(example[string("self")][string("first")].as_string() == result[string("self")][string("first")].as_string()); + ASSERT_TRUE(result[string("self")][string("second")].is_int()); + ASSERT_EQ(example[string("self")][string("second")].as_int(), result[string("self")][string("second")].as_int()); + ASSERT_TRUE(result[string("self")][string("third")].is_float()); + ASSERT_DOUBLE_EQ(example[string("self")][string("third")].as_double(), result[string("self")][string("third")].as_double()); + ASSERT_TRUE(f$unlink(filename)); +} \ No newline at end of file From 6cd1ccfdc933487dedf2738f146c455758ee792d Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Fri, 23 Dec 2022 21:04:15 +0300 Subject: [PATCH 02/25] khhp-yaml: codestyle fixes & memory optimizations --- runtime/yaml.cpp | 114 +++++++++++++++-------------------------------- 1 file changed, 37 insertions(+), 77 deletions(-) diff --git a/runtime/yaml.cpp b/runtime/yaml.cpp index 60e37cc49c..1469128f8b 100644 --- a/runtime/yaml.cpp +++ b/runtime/yaml.cpp @@ -6,38 +6,33 @@ #include "runtime/yaml.h" -// TODO: add key-arrays support void yaml_node_to_mixed(const YAML::Node &node, mixed &data) { data.clear(); if (node.IsNull()) { php_warning("Yaml node is null. Skipping it"); - return; - } - if (node.IsScalar()) { - string string_data = string(node.as().c_str()); + } else if (node.IsScalar()) { + const string string_data = string(node.as().c_str()); if (string_data.is_int()) { data = string_data.to_int(); - return; - } - dl::enter_critical_section(); - double *float_data = new double; - if (string_data.try_to_float(float_data)) { - data = *float_data; + } else { + dl::enter_critical_section(); + auto *float_data = new double; + if (string_data.try_to_float(float_data)) { + data = *float_data; + } else { + data = string_data; + } + delete float_data; + dl::leave_critical_section(); } - else data = string_data; - delete float_data; - dl::leave_critical_section(); - return; - } - if (node.IsSequence()) { + } else if (node.IsSequence()) { for (auto it = node.begin(); it != node.end(); ++it) { mixed data_piece; yaml_node_to_mixed(*it, data_piece); data.push_back(data_piece); } - } - else if (node.IsMap()) { - for (auto it : node) { + } else if (node.IsMap()) { + for (const auto &it : node) { mixed data_piece; yaml_node_to_mixed(it.second, data_piece); data[string(it.first.as().c_str())] = data_piece; @@ -45,79 +40,43 @@ void yaml_node_to_mixed(const YAML::Node &node, mixed &data) { } } -// TODO: rewrite using switch -// TODO: add key-arrays support void mixed_to_yaml_node(const mixed &data, YAML::Node &node) { if (!data.is_array()) { if (data.is_null()) { - php_warning("Cannot convert (mixed)null into yaml node"); - return; - } - if (data.is_string()) { + php_warning("Cannot convert null into yaml node"); + } else if (data.is_string()) { node = std::string(data.as_string().c_str()); - return; - } - if (data.is_int()) { + } else if (data.is_int()) { node = data.as_int(); - return; - } - if (data.is_float()) { + } else if (data.is_float()) { node = data.as_double(); - return; - } - if (data.is_bool()) { - if (!data.as_bool()) { - node = false; - } - else { - node = true; - } - return; + } else if (data.is_bool()) { + node = data.as_bool(); } + return; } - array data_array = data.as_array(); + const array &data_array = data.as_array(); if (data_array.is_pseudo_vector()) { - for (auto it = data_array.begin(); it != data_array.end(); ++it) { - mixed data_piece = it.get_value(); - if (data_piece.is_array()) { - YAML::Node node_piece; - mixed_to_yaml_node(data_piece, node_piece); - node.push_back(node_piece); - } - else if (data_piece.is_string()) { - node.push_back(std::string(data_piece.as_string().c_str())); - } - else if (data_piece.is_int()) { - node.push_back(data_piece.as_int()); - } - else if (data_piece.is_float()) { - node.push_back(data_piece.as_double()); - } - else if (data_piece.is_bool()) { - node.push_back(data_piece.as_bool()); - } - else if (data_piece.is_null()) { - php_warning("Data piece is null. Skipping it"); - } - else php_warning("Unknown data type. Skipping it"); // maybe this is redundant? + for (const auto &it : data_array) { + const mixed data_piece = it.get_value(); + YAML::Node node_piece; + mixed_to_yaml_node(data_piece, node_piece); + node.push_back(node_piece); } } else { - for (auto it = data_array.begin(); it != data_array.end(); ++it) { - mixed data_piece = it.get_value(); + for (const auto &it : data_array) { + const mixed data_key = it.get_key(); + const mixed data_piece = it.get_value(); YAML::Node node_piece; mixed_to_yaml_node(data_piece, node_piece); - mixed data_key = it.get_key(); if (data_key.is_null() || data_key.is_array()) { - php_warning("Key is null or an array. Cannot handle it. Pushing data as in a vector"); + php_warning("Null and array keys are prohibited. Pushing data as in a vector"); node.push_back(node_piece); - } - else if (data_key.is_string()) { + } else if (data_key.is_string()) { node[std::string(data_key.as_string().c_str())] = node_piece; - } - else if (data_key.is_int()) { // float and bool are cast to int + } else if (data_key.is_int()) { // float and bool keys are cast to int node[data_key.as_int()] = node_piece; - } - else { // maybe this is redundant? + } else { php_warning("Unknown key type. Pushing data as in a vector"); node.push_back(node_piece); } @@ -159,7 +118,7 @@ mixed f$yaml_parse_file(const string &filename, int pos) { } mixed f$yaml_parse(const string &data, int pos) { - if (pos < 0) { + if (pos != 0) { php_warning("Argument \"pos\" = %d. Values other than 0 are not supported yet. Setting to default (pos = 0)", pos); } YAML::Node node = YAML::Load(data.c_str()); @@ -167,3 +126,4 @@ mixed f$yaml_parse(const string &data, int pos) { yaml_node_to_mixed(node, parsed_data); return parsed_data; } + From da3ee16a863d2c83719c0f3f95431c7afd9a9f4b Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Sun, 26 Feb 2023 11:27:08 +0300 Subject: [PATCH 03/25] yaml: add empty string tests and number-as-string tests --- tests/cpp/runtime/yaml-test.cpp | 51 +++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tests/cpp/runtime/yaml-test.cpp b/tests/cpp/runtime/yaml-test.cpp index 60fbc83693..9ca273ef34 100644 --- a/tests/cpp/runtime/yaml-test.cpp +++ b/tests/cpp/runtime/yaml-test.cpp @@ -10,6 +10,13 @@ TEST(yaml_test, test_yaml_string) { ASSERT_TRUE(example.as_string() == result.as_string()); } +TEST(yaml_test, test_yaml_empty_string) { + mixed example = string(""); + mixed result = f$yaml_parse(f$yaml_emit(example)); + ASSERT_TRUE(result.is_string()); + ASSERT_TRUE(example.as_string() == result.as_string()); +} + TEST(yaml_test, test_yaml_int) { mixed example = 13; mixed result = f$yaml_parse(f$yaml_emit(example)); @@ -24,6 +31,20 @@ TEST(yaml_test, test_yaml_float) { ASSERT_DOUBLE_EQ(example.as_double(), result.as_double()); } +TEST(yaml_test, test_yaml_int_as_string) { + mixed example = string("13"); + mixed result = f$yaml_parse(f$yaml_emit(example)); + ASSERT_TRUE(result.is_string()); + ASSERT_EQ(example.as_string(), result.as_string()); +} + +TEST(yaml_test, test_yaml_float_as_string) { + mixed example = string("3.1416"); + mixed result = f$yaml_parse(f$yaml_emit(example)); + ASSERT_TRUE(result.is_string()); + ASSERT_EQ(example.as_string(), result.as_string()); +} + TEST(yaml_test, test_yaml_vector) { mixed example; example.push_back(string("string")); @@ -106,6 +127,16 @@ TEST(yaml_test, test_yaml_string_file) { ASSERT_TRUE(f$unlink(filename)); } +TEST(yaml_test, test_yaml_empty_string_file) { + mixed example = string(""); + string filename("test_yaml_empty_string"); + ASSERT_TRUE(f$yaml_emit_file(filename, example)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result.is_string()); + ASSERT_TRUE(example.as_string() == result.as_string()); + ASSERT_TRUE(f$unlink(filename)); +} + TEST(yaml_test, test_yaml_int_file) { mixed example = 13; string filename("test_yaml_int"); @@ -126,6 +157,26 @@ TEST(yaml_test, test_yaml_float_file) { ASSERT_TRUE(f$unlink(filename)); } +TEST(yaml_test, test_yaml_int_as_string_file) { + mixed example = string("13"); + string filename("test_yaml_int_as_string"); + ASSERT_TRUE(f$yaml_emit_file(filename, example)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result.is_string()); + ASSERT_EQ(example.as_string(), result.as_string()); + ASSERT_TRUE(f$unlink(filename)); +} + +TEST(yaml_test, test_yaml_float_as_string_file) { + mixed example = string("3.1416"); + string filename("test_yaml_float_as_string"); + ASSERT_TRUE(f$yaml_emit_file(filename, example)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result.is_string()); + ASSERT_EQ(example.as_string(), result.as_string()); + ASSERT_TRUE(f$unlink(filename)); +} + TEST(yaml_test, test_yaml_vector_file) { mixed example; example.push_back(string("string")); From 4e9cc2ebc7f9b906e7b2d0d751a064c4d20ae8b3 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Sun, 26 Feb 2023 11:28:28 +0300 Subject: [PATCH 04/25] yaml: fix number-as-string parsing and rewrite mixed_to_yaml() function --- runtime/yaml.cpp | 104 +++++++++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 40 deletions(-) diff --git a/runtime/yaml.cpp b/runtime/yaml.cpp index 1469128f8b..ca158b8475 100644 --- a/runtime/yaml.cpp +++ b/runtime/yaml.cpp @@ -3,22 +3,27 @@ #include "runtime/optional.h" #include "runtime/streams.h" #include "runtime/critical_section.h" - #include "runtime/yaml.h" -void yaml_node_to_mixed(const YAML::Node &node, mixed &data) { - data.clear(); - if (node.IsNull()) { - php_warning("Yaml node is null. Skipping it"); - } else if (node.IsScalar()) { - const string string_data = string(node.as().c_str()); +void yaml_node_to_mixed(const YAML::Node &node, mixed &data, const string &source) { + data.clear(); // sets data to NULL + if (node.IsScalar()) { + const string string_data(node.as().c_str()); if (string_data.is_int()) { - data = string_data.to_int(); + if (source[node.Mark().pos] == '"' && source[node.Mark().pos + string_data.size() + 1] == '"') { + data = string_data; + } else { + data = string_data.to_int(); + } } else { dl::enter_critical_section(); auto *float_data = new double; if (string_data.try_to_float(float_data)) { - data = *float_data; + if (source[node.Mark().pos] == '"' && source[node.Mark().pos + string_data.size() + 1] == '"') { + data = string_data; + } else { + data = *float_data; + } } else { data = string_data; } @@ -28,58 +33,78 @@ void yaml_node_to_mixed(const YAML::Node &node, mixed &data) { } else if (node.IsSequence()) { for (auto it = node.begin(); it != node.end(); ++it) { mixed data_piece; - yaml_node_to_mixed(*it, data_piece); + yaml_node_to_mixed(*it, data_piece, source); data.push_back(data_piece); } } else if (node.IsMap()) { for (const auto &it : node) { mixed data_piece; - yaml_node_to_mixed(it.second, data_piece); + yaml_node_to_mixed(it.second, data_piece, source); data[string(it.first.as().c_str())] = data_piece; } } } -void mixed_to_yaml_node(const mixed &data, YAML::Node &node) { +string print_tabs(uint8_t nesting_level) { + string tabs; + for (uint8_t i = 0; i < 2 * nesting_level; i++) { + tabs.push_back(' '); + } + return tabs; +} + +string print_key(const mixed& data_key) { + if (data_key.is_string()) { + return data_key.as_string(); + } + return string(data_key.as_int()); // array can not be a key; bool and float keys are cast to int +} + +void mixed_to_string(const mixed& data, string& string_data, uint8_t nesting_level = 0) { if (!data.is_array()) { if (data.is_null()) { - php_warning("Cannot convert null into yaml node"); + string_data.push_back('~'); } else if (data.is_string()) { - node = std::string(data.as_string().c_str()); + const string& string_data_piece = data.as_string(); + if (string_data_piece.size() < 2 + || (string_data_piece[0] != '"' && string_data_piece[string_data_piece.size() - 1] != '"')) { + string_data.push_back('"'); + string_data.append(string_data_piece); + string_data.push_back('"'); + } else { + string_data.append(string_data_piece); + } } else if (data.is_int()) { - node = data.as_int(); + string_data.append(data.as_int()); } else if (data.is_float()) { - node = data.as_double(); + string_data.append(data.as_double()); } else if (data.is_bool()) { - node = data.as_bool(); + string_data.append(data.as_bool()); } + string_data.push_back('\n'); return; } const array &data_array = data.as_array(); if (data_array.is_pseudo_vector()) { for (const auto &it : data_array) { - const mixed data_piece = it.get_value(); - YAML::Node node_piece; - mixed_to_yaml_node(data_piece, node_piece); - node.push_back(node_piece); + const mixed &data_piece = it.get_value(); + string_data.append(print_tabs(nesting_level)); + string_data.append("- "); + if (data_piece.is_array()) { + string_data.push_back('\n'); + } + mixed_to_string(data_piece, string_data, nesting_level + 1); } } else { for (const auto &it : data_array) { - const mixed data_key = it.get_key(); - const mixed data_piece = it.get_value(); - YAML::Node node_piece; - mixed_to_yaml_node(data_piece, node_piece); - if (data_key.is_null() || data_key.is_array()) { - php_warning("Null and array keys are prohibited. Pushing data as in a vector"); - node.push_back(node_piece); - } else if (data_key.is_string()) { - node[std::string(data_key.as_string().c_str())] = node_piece; - } else if (data_key.is_int()) { // float and bool keys are cast to int - node[data_key.as_int()] = node_piece; - } else { - php_warning("Unknown key type. Pushing data as in a vector"); - node.push_back(node_piece); + const mixed &data_piece = it.get_value(); + string_data.append(print_tabs(nesting_level)); + string_data.append(print_key(it.get_key())); + string_data.append(": "); + if (data_piece.is_array()) { + string_data.push_back('\n'); } + mixed_to_string(data_piece, string_data, nesting_level + 1); } } } @@ -99,9 +124,9 @@ bool f$yaml_emit_file(const string &filename, const mixed &data) { } string f$yaml_emit(const mixed &data) { - YAML::Node node; - mixed_to_yaml_node(data, node); - return string(YAML::Dump(node).c_str()); + string string_data; + mixed_to_string(data, string_data); + return string_data; } mixed f$yaml_parse_file(const string &filename, int pos) { @@ -123,7 +148,6 @@ mixed f$yaml_parse(const string &data, int pos) { } YAML::Node node = YAML::Load(data.c_str()); mixed parsed_data; - yaml_node_to_mixed(node, parsed_data); + yaml_node_to_mixed(node, parsed_data, data); return parsed_data; } - From a4422d8faa36cd9223d8e6f6a8edd7a0d3a276e8 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Sun, 26 Feb 2023 22:51:41 +0300 Subject: [PATCH 05/25] kphp-yaml: fix boolean emitting & parsing --- runtime/yaml.cpp | 10 +++++++-- tests/cpp/runtime/yaml-test.cpp | 40 +++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/runtime/yaml.cpp b/runtime/yaml.cpp index ca158b8475..5c477dbe12 100644 --- a/runtime/yaml.cpp +++ b/runtime/yaml.cpp @@ -9,7 +9,13 @@ void yaml_node_to_mixed(const YAML::Node &node, mixed &data, const string &sourc data.clear(); // sets data to NULL if (node.IsScalar()) { const string string_data(node.as().c_str()); - if (string_data.is_int()) { + if (string_data == string("true") || string_data == string("false")) { + if (source[node.Mark().pos] == '"' && source[node.Mark().pos + string_data.size() + 1] == '"') { + data = string_data; + } else { + data = string_data == string("true"); + } + } else if (string_data.is_int()) { if (source[node.Mark().pos] == '"' && source[node.Mark().pos + string_data.size() + 1] == '"') { data = string_data; } else { @@ -79,7 +85,7 @@ void mixed_to_string(const mixed& data, string& string_data, uint8_t nesting_lev } else if (data.is_float()) { string_data.append(data.as_double()); } else if (data.is_bool()) { - string_data.append(data.as_bool()); + string_data.append((data.as_bool()) ? "true" : "false"); } string_data.push_back('\n'); return; diff --git a/tests/cpp/runtime/yaml-test.cpp b/tests/cpp/runtime/yaml-test.cpp index 9ca273ef34..2ae8431516 100644 --- a/tests/cpp/runtime/yaml-test.cpp +++ b/tests/cpp/runtime/yaml-test.cpp @@ -81,6 +81,25 @@ TEST(yaml_test, test_yaml_vector_recursive) { ASSERT_DOUBLE_EQ(example[3][2].as_double(), result[3][2].as_double()); } +TEST(yaml_test, test_yaml_bool) { + mixed example; + example.push_back(true); + example.push_back(false); + example.push_back(string("true")); + example.push_back(string("false")); + mixed result = f$yaml_parse(f$yaml_emit(example)); + ASSERT_TRUE(result.is_array()); + ASSERT_TRUE(result.as_array().is_pseudo_vector()); + ASSERT_TRUE(result[0].is_bool()); + ASSERT_TRUE(example[0].as_bool() == result[0].as_bool()); + ASSERT_TRUE(result[1].is_bool()); + ASSERT_TRUE(example[1].as_bool() == result[1].as_bool()); + ASSERT_TRUE(result[2].is_string()); + ASSERT_TRUE(example[2].as_string() == result[2].as_string()); + ASSERT_TRUE(result[3].is_string()); + ASSERT_TRUE(example[3].as_string() == result[3].as_string()); +} + TEST(yaml_test, test_yaml_map) { mixed example; example[string("first")] = string("string"); @@ -219,6 +238,27 @@ TEST(yaml_test, test_yaml_vector_recursive_file) { ASSERT_TRUE(f$unlink(filename)); } +TEST(yaml_test, test_yaml_bool_file) { + mixed example; + example.push_back(true); + example.push_back(false); + example.push_back(string("true")); + example.push_back(string("false")); + string filename("test_yaml_bool"); + ASSERT_TRUE(f$yaml_emit_file(filename, example)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result.is_array()); + ASSERT_TRUE(result.as_array().is_pseudo_vector()); + ASSERT_TRUE(result[0].is_bool()); + ASSERT_TRUE(example[0].as_bool() == result[0].as_bool()); + ASSERT_TRUE(result[1].is_bool()); + ASSERT_TRUE(example[1].as_bool() == result[1].as_bool()); + ASSERT_TRUE(result[2].is_string()); + ASSERT_TRUE(example[2].as_string() == result[2].as_string()); + ASSERT_TRUE(result[3].is_string()); + ASSERT_TRUE(example[3].as_string() == result[3].as_string()); +} + TEST(yaml_test, test_yaml_map_file) { mixed example; example[string("first")] = string("string"); From d7322b53553e89199448f5dfb187840899053d4f Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Mon, 27 Feb 2023 21:49:06 +0300 Subject: [PATCH 06/25] kphp-yaml: add PHP test --- runtime/yaml.cpp | 3 ++- tests/cpp/runtime/yaml-test.cpp | 1 + tests/phpt/yaml/01_complex.php | 13 +++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 tests/phpt/yaml/01_complex.php diff --git a/runtime/yaml.cpp b/runtime/yaml.cpp index 5c477dbe12..99b0fddaa3 100644 --- a/runtime/yaml.cpp +++ b/runtime/yaml.cpp @@ -130,8 +130,9 @@ bool f$yaml_emit_file(const string &filename, const mixed &data) { } string f$yaml_emit(const mixed &data) { - string string_data; + string string_data("---\n"); mixed_to_string(data, string_data); + string_data.append("..."); return string_data; } diff --git a/tests/cpp/runtime/yaml-test.cpp b/tests/cpp/runtime/yaml-test.cpp index 2ae8431516..62a469155b 100644 --- a/tests/cpp/runtime/yaml-test.cpp +++ b/tests/cpp/runtime/yaml-test.cpp @@ -257,6 +257,7 @@ TEST(yaml_test, test_yaml_bool_file) { ASSERT_TRUE(example[2].as_string() == result[2].as_string()); ASSERT_TRUE(result[3].is_string()); ASSERT_TRUE(example[3].as_string() == result[3].as_string()); + ASSERT_TRUE(f$unlink(filename)); } TEST(yaml_test, test_yaml_map_file) { diff --git a/tests/phpt/yaml/01_complex.php b/tests/phpt/yaml/01_complex.php new file mode 100644 index 0000000000..801956bd77 --- /dev/null +++ b/tests/phpt/yaml/01_complex.php @@ -0,0 +1,13 @@ +@ok +"true", true=>true, false=>false, array("false"=>"false", 10 => null))); + echo(serialize($in) . PHP_EOL); + /** @param mixed $out */ + $out = yaml_parse(yaml_emit($in)); + echo(serialize($out) . PHP_EOL); +} + +test_emit_parse_complex_mixed(); \ No newline at end of file From 48febd953f14ba6ae91ee64fb9785b865dec6b05 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Mon, 27 Feb 2023 21:50:01 +0300 Subject: [PATCH 07/25] kphp-yaml: add yaml.so extension to kphp_run_once.py for PHP tests to execute --- tests/python/lib/kphp_run_once.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/lib/kphp_run_once.py b/tests/python/lib/kphp_run_once.py index ba078a8fe8..049c20709e 100644 --- a/tests/python/lib/kphp_run_once.py +++ b/tests/python/lib/kphp_run_once.py @@ -40,7 +40,7 @@ def _get_extensions(self): ("extension", "tokenizer.so"), ("extension", "h3.so"), ("extension", "zstd.so"), - ("extension", "ctype.so") + ("extension", "yaml.so"), ] if self._vkext_dir: From 51771d2a2249a4f87356ccb7b056d712427cb90c Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Mon, 27 Feb 2023 21:58:35 +0300 Subject: [PATCH 08/25] kphp yaml: return an accidentally deleted line (tests/python/lib/kphp_run_once.py:43 --- tests/python/lib/kphp_run_once.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/python/lib/kphp_run_once.py b/tests/python/lib/kphp_run_once.py index 049c20709e..599518d69e 100644 --- a/tests/python/lib/kphp_run_once.py +++ b/tests/python/lib/kphp_run_once.py @@ -40,7 +40,8 @@ def _get_extensions(self): ("extension", "tokenizer.so"), ("extension", "h3.so"), ("extension", "zstd.so"), - ("extension", "yaml.so"), + ("extension", "ctype.so"), + ("extension", "yaml.so") ] if self._vkext_dir: From 66131a78d8855422e55929bff86e5b02cbe6083b Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Tue, 28 Feb 2023 19:52:27 +0300 Subject: [PATCH 09/25] kphp-yaml: temporarily deleted changes in _functions.txt --- builtin-functions/_functions.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt index c17c686e01..f4f9cd13fe 100644 --- a/builtin-functions/_functions.txt +++ b/builtin-functions/_functions.txt @@ -1611,8 +1611,3 @@ class DateTimeImmutable implements DateTimeInterface { public function getOffset(): int; public function getTimestamp(): int; } - -function yaml_emit_file ($filename ::: string, $data ::: mixed) ::: bool; -function yaml_emit ($data ::: mixed) ::: string; -function yaml_parse_file ($filename ::: string, $pos ::: int = 0) ::: mixed; -function yaml_parse ($data ::: string, $pos ::: int = 0) ::: mixed; From 3c1d0ddb9f5b1a63f7b035294f9aba93e69c53ce Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Tue, 28 Feb 2023 19:55:22 +0300 Subject: [PATCH 10/25] kphp-yaml: bring back changes in _functions.txt [conflict resolved] --- builtin-functions/_functions.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt index 5c7a8f3a91..fbd8bbb003 100644 --- a/builtin-functions/_functions.txt +++ b/builtin-functions/_functions.txt @@ -1619,3 +1619,8 @@ class DateTimeImmutable implements DateTimeInterface { } function getenv(string $varname = '', bool $local_only = false): mixed; + +function yaml_emit_file ($filename ::: string, $data ::: mixed) ::: bool; +function yaml_emit ($data ::: mixed) ::: string; +function yaml_parse_file ($filename ::: string, $pos ::: int = 0) ::: mixed; +function yaml_parse ($data ::: string, $pos ::: int = 0) ::: mixed; From c45668264db4475cfdd7ca5eb2956fffe051b2fd Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Sun, 12 Mar 2023 00:27:27 +0300 Subject: [PATCH 11/25] yaml: delete obvious comments in runtime/yaml.h --- runtime/yaml.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/runtime/yaml.h b/runtime/yaml.h index 0a694ef596..cd03cdffbb 100644 --- a/runtime/yaml.h +++ b/runtime/yaml.h @@ -2,22 +2,10 @@ #include "runtime/kphp_core.h" -/* - * emit mixed into a yaml file - */ bool f$yaml_emit_file(const string &filename, const mixed &data); -/* - * emit mixed into a yaml string - */ string f$yaml_emit(const mixed &data); -/* - * parse yaml file into mixed - */ mixed f$yaml_parse_file(const string &filename, int pos = 0); -/* - * parse yaml string into mixed - */ -mixed f$yaml_parse(const string &data, int pos = 0); \ No newline at end of file +mixed f$yaml_parse(const string &data, int pos = 0); From a5adf3f1d0453520ba7bf8f7c6f79816bef87c9e Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Sun, 12 Mar 2023 00:28:06 +0300 Subject: [PATCH 12/25] yaml: support for empty arrays, new comments, codestyle fixes --- runtime/yaml.cpp | 131 +++++++++++++++++++++++++---------------------- 1 file changed, 69 insertions(+), 62 deletions(-) diff --git a/runtime/yaml.cpp b/runtime/yaml.cpp index 99b0fddaa3..c40fdf593e 100644 --- a/runtime/yaml.cpp +++ b/runtime/yaml.cpp @@ -5,37 +5,36 @@ #include "runtime/critical_section.h" #include "runtime/yaml.h" -void yaml_node_to_mixed(const YAML::Node &node, mixed &data, const string &source) { +/* + * convert YAML::Node to mixed after parsing a YAML document into YAML::Node + */ +static void yaml_node_to_mixed(const YAML::Node &node, mixed &data, const string &source) noexcept { data.clear(); // sets data to NULL if (node.IsScalar()) { const string string_data(node.as().c_str()); - if (string_data == string("true") || string_data == string("false")) { - if (source[node.Mark().pos] == '"' && source[node.Mark().pos + string_data.size() + 1] == '"') { - data = string_data; - } else { - data = string_data == string("true"); - } + // check whether the primitive is put in quotes in the source YAML + const bool string_data_has_quotes = (source[node.Mark().pos] == '"' && source[node.Mark().pos + string_data.size() + 1] == '"'); + // if so, it is a string + if (string_data_has_quotes) { + data = string_data; + } else if (string_data == string("true")) { + data = true; // "true" without quotes is boolean(1) + } else if (string_data == string("false")) { + data = false; // "false" without quotes is boolean(0) } else if (string_data.is_int()) { - if (source[node.Mark().pos] == '"' && source[node.Mark().pos + string_data.size() + 1] == '"') { - data = string_data; - } else { - data = string_data.to_int(); - } + data = string_data.to_int(); } else { - dl::enter_critical_section(); - auto *float_data = new double; - if (string_data.try_to_float(float_data)) { - if (source[node.Mark().pos] == '"' && source[node.Mark().pos + string_data.size() + 1] == '"') { - data = string_data; - } else { - data = *float_data; - } + double float_data = 0.0; + if (string_data.try_to_float(&float_data)) { + data = float_data; } else { data = string_data; } - delete float_data; - dl::leave_critical_section(); } + } else if (node.size() == 0 && node.IsDefined() && !node.IsNull()) { + // if node is defined, is not null or scalar and has size 0, then it is an empty array + array empty_array; + data = empty_array; } else if (node.IsSequence()) { for (auto it = node.begin(); it != node.end(); ++it) { mixed data_piece; @@ -49,69 +48,77 @@ void yaml_node_to_mixed(const YAML::Node &node, mixed &data, const string &sourc data[string(it.first.as().c_str())] = data_piece; } } + // else node is Null or Undefined, so data is Null } -string print_tabs(uint8_t nesting_level) { - string tabs; - for (uint8_t i = 0; i < 2 * nesting_level; i++) { - tabs.push_back(' '); - } - return tabs; +/* + * print tabs in quantity of nesting_level (used to print nested YAML entries) + */ +static string yaml_print_tabs(const uint8_t nesting_level) noexcept { + return string(2 * nesting_level, ' '); } -string print_key(const mixed& data_key) { +/* + * print the key of a YAML map entry + */ +static string yaml_print_key(const mixed& data_key) noexcept { if (data_key.is_string()) { return data_key.as_string(); } - return string(data_key.as_int()); // array can not be a key; bool and float keys are cast to int + return string(data_key.as_int()); // key can not be an array; bool and float keys are cast to int } -void mixed_to_string(const mixed& data, string& string_data, uint8_t nesting_level = 0) { +/* + * get a YAML representation of mixed in a string variable + */ +static void mixed_to_string(const mixed& data, string& string_data, const uint8_t nesting_level = 0) noexcept { + string buffer; if (!data.is_array()) { if (data.is_null()) { - string_data.push_back('~'); + buffer.push_back('~'); } else if (data.is_string()) { const string& string_data_piece = data.as_string(); - if (string_data_piece.size() < 2 - || (string_data_piece[0] != '"' && string_data_piece[string_data_piece.size() - 1] != '"')) { - string_data.push_back('"'); - string_data.append(string_data_piece); - string_data.push_back('"'); + // check if a string has quotes + if (string_data_piece.size() < 2 || (string_data_piece[0] != '"' && string_data_piece[string_data_piece.size() - 1] != '"')) { + // if not, put it in quotes + buffer.push_back('"'); + buffer.append(string_data_piece); + buffer.push_back('"'); } else { - string_data.append(string_data_piece); + buffer = string_data_piece; } } else if (data.is_int()) { - string_data.append(data.as_int()); + buffer.append(data.as_int()); } else if (data.is_float()) { - string_data.append(data.as_double()); + buffer.append(data.as_double()); } else if (data.is_bool()) { - string_data.append((data.as_bool()) ? "true" : "false"); + buffer = (data.as_bool()) ? string("true") : string("false"); } + string_data.append(buffer); string_data.push_back('\n'); return; } const array &data_array = data.as_array(); - if (data_array.is_pseudo_vector()) { - for (const auto &it : data_array) { - const mixed &data_piece = it.get_value(); - string_data.append(print_tabs(nesting_level)); - string_data.append("- "); - if (data_piece.is_array()) { - string_data.push_back('\n'); - } - mixed_to_string(data_piece, string_data, nesting_level + 1); + if (data_array.empty()) { + string_data.append("[]\n"); // an empty array is represented as [] in YAML + } + const bool data_array_is_vector = data_array.is_pseudo_vector(); // check if an array has keys increasing by 1 starting from 0 + for (const auto &it : data_array) { + const mixed &data_piece = it.get_value(); + buffer = yaml_print_tabs(nesting_level); + if (data_array_is_vector) { + buffer.push_back('-'); + } else { + buffer.append(yaml_print_key(it.get_key())); + buffer.push_back(':'); } - } else { - for (const auto &it : data_array) { - const mixed &data_piece = it.get_value(); - string_data.append(print_tabs(nesting_level)); - string_data.append(print_key(it.get_key())); - string_data.append(": "); - if (data_piece.is_array()) { - string_data.push_back('\n'); - } - mixed_to_string(data_piece, string_data, nesting_level + 1); + if (data_piece.is_array() && !data_piece.as_array().empty()) { + buffer.push_back('\n'); // if an element of an array is also a non-empty array, print it on the next line + } else { + buffer.push_back(' '); // if an element of an array is a primitive or an empty array, print it after a space } + string_data.append(buffer); + mixed_to_string(data_piece, string_data, nesting_level + 1); // for entries of an array, increase nesting level } } @@ -130,9 +137,9 @@ bool f$yaml_emit_file(const string &filename, const mixed &data) { } string f$yaml_emit(const mixed &data) { - string string_data("---\n"); + string string_data("---\n"); // beginning of a YAML document mixed_to_string(data, string_data); - string_data.append("..."); + string_data.append("...\n"); // ending of a YAML document return string_data; } From 47d382a1fe89daf61a4717024e8eb591432aa620 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Sun, 12 Mar 2023 00:28:42 +0300 Subject: [PATCH 13/25] yaml: add null and empty array C++ tests --- tests/cpp/runtime/yaml-test.cpp | 34 ++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/tests/cpp/runtime/yaml-test.cpp b/tests/cpp/runtime/yaml-test.cpp index 62a469155b..88f78030c3 100644 --- a/tests/cpp/runtime/yaml-test.cpp +++ b/tests/cpp/runtime/yaml-test.cpp @@ -136,6 +136,19 @@ TEST(yaml_test, test_yaml_map_recursive) { ASSERT_DOUBLE_EQ(example[string("self")][string("third")].as_double(), result[string("self")][string("third")].as_double()); } +TEST(yaml_test, test_yaml_empty_array) { + array empty_array; + mixed result = f$yaml_parse(f$yaml_emit(empty_array)); + ASSERT_TRUE(result.is_array()); + ASSERT_TRUE(result.as_array().empty()); +} + +TEST(yaml_test, test_yaml_null) { + mixed example; + mixed result = f$yaml_parse(f$yaml_emit(example)); + ASSERT_TRUE(result.is_null()); +} + TEST(yaml_test, test_yaml_string_file) { mixed example = string("string"); string filename("test_yaml_string"); @@ -300,4 +313,23 @@ TEST(yaml_test, test_yaml_map_recursive_file) { ASSERT_TRUE(result[string("self")][string("third")].is_float()); ASSERT_DOUBLE_EQ(example[string("self")][string("third")].as_double(), result[string("self")][string("third")].as_double()); ASSERT_TRUE(f$unlink(filename)); -} \ No newline at end of file +} + +TEST(yaml_test, test_yaml_empty_array_file) { + array empty_array; + string filename("test_yaml_empty_array"); + ASSERT_TRUE(f$yaml_emit_file(filename, empty_array)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result.is_array()); + ASSERT_TRUE(result.as_array().empty()); + ASSERT_TRUE(f$unlink(filename)); +} + +TEST(yaml_test, test_yaml_null_file) { + mixed example; + string filename("test_yaml_null"); + ASSERT_TRUE(f$yaml_emit_file(filename, example)); + mixed result = f$yaml_parse_file(filename); + ASSERT_TRUE(result.is_null()); + ASSERT_TRUE(f$unlink(filename)); +} From 4d88e37be38d76433ab2fdf8174710e2c5e1a375 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Sun, 12 Mar 2023 00:29:30 +0300 Subject: [PATCH 14/25] yaml: add PHP test for docs/_config.yml file --- tests/phpt/yaml/01_complex.php | 13 ------------- tests/phpt/yaml/01_yaml_complex.php | 13 +++++++++++++ tests/phpt/yaml/02_yaml_config_file.php | 15 +++++++++++++++ 3 files changed, 28 insertions(+), 13 deletions(-) delete mode 100644 tests/phpt/yaml/01_complex.php create mode 100644 tests/phpt/yaml/01_yaml_complex.php create mode 100644 tests/phpt/yaml/02_yaml_config_file.php diff --git a/tests/phpt/yaml/01_complex.php b/tests/phpt/yaml/01_complex.php deleted file mode 100644 index 801956bd77..0000000000 --- a/tests/phpt/yaml/01_complex.php +++ /dev/null @@ -1,13 +0,0 @@ -@ok -"true", true=>true, false=>false, array("false"=>"false", 10 => null))); - echo(serialize($in) . PHP_EOL); - /** @param mixed $out */ - $out = yaml_parse(yaml_emit($in)); - echo(serialize($out) . PHP_EOL); -} - -test_emit_parse_complex_mixed(); \ No newline at end of file diff --git a/tests/phpt/yaml/01_yaml_complex.php b/tests/phpt/yaml/01_yaml_complex.php new file mode 100644 index 0000000000..4481ff7d34 --- /dev/null +++ b/tests/phpt/yaml/01_yaml_complex.php @@ -0,0 +1,13 @@ +@ok +"true", true=>true, false=>false, array("false"=>"false", 10 => null))); + echo(serialize($in) . PHP_EOL); + /** @param mixed $out */ + $out = yaml_parse(yaml_emit($in)); + echo(serialize($out) . PHP_EOL); +} + +test_yaml_complex(); diff --git a/tests/phpt/yaml/02_yaml_config_file.php b/tests/phpt/yaml/02_yaml_config_file.php new file mode 100644 index 0000000000..30ee53c33b --- /dev/null +++ b/tests/phpt/yaml/02_yaml_config_file.php @@ -0,0 +1,15 @@ +@ok + Date: Sun, 12 Mar 2023 00:36:10 +0300 Subject: [PATCH 15/25] yaml: add comma in tests/python/lib/kphp_run_once.py:44 --- tests/python/lib/kphp_run_once.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/lib/kphp_run_once.py b/tests/python/lib/kphp_run_once.py index 599518d69e..75855c0262 100644 --- a/tests/python/lib/kphp_run_once.py +++ b/tests/python/lib/kphp_run_once.py @@ -41,7 +41,7 @@ def _get_extensions(self): ("extension", "h3.so"), ("extension", "zstd.so"), ("extension", "ctype.so"), - ("extension", "yaml.so") + ("extension", "yaml.so"), ] if self._vkext_dir: From 252cb5829dcbf487b58bf8d3cafd44183e02cdb5 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Sun, 23 Apr 2023 19:37:00 +0300 Subject: [PATCH 16/25] yaml: minor codestyle fixes according to @unserialize review --- runtime/yaml.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/runtime/yaml.cpp b/runtime/yaml.cpp index c40fdf593e..bdf86989ce 100644 --- a/runtime/yaml.cpp +++ b/runtime/yaml.cpp @@ -45,7 +45,7 @@ static void yaml_node_to_mixed(const YAML::Node &node, mixed &data, const string for (const auto &it : node) { mixed data_piece; yaml_node_to_mixed(it.second, data_piece, source); - data[string(it.first.as().c_str())] = data_piece; + data.set_value(string(it.first.as().c_str()), data_piece); } } // else node is Null or Undefined, so data is Null @@ -61,7 +61,7 @@ static string yaml_print_tabs(const uint8_t nesting_level) noexcept { /* * print the key of a YAML map entry */ -static string yaml_print_key(const mixed& data_key) noexcept { +static string yaml_print_key(const mixed &data_key) noexcept { if (data_key.is_string()) { return data_key.as_string(); } @@ -71,13 +71,13 @@ static string yaml_print_key(const mixed& data_key) noexcept { /* * get a YAML representation of mixed in a string variable */ -static void mixed_to_string(const mixed& data, string& string_data, const uint8_t nesting_level = 0) noexcept { +static void mixed_to_string(const mixed &data, string &string_data, const uint8_t nesting_level = 0) noexcept { string buffer; if (!data.is_array()) { if (data.is_null()) { - buffer.push_back('~'); + buffer.push_back('~'); // tilda is a YAML representation of NULL } else if (data.is_string()) { - const string& string_data_piece = data.as_string(); + const string &string_data_piece = data.as_string(); // check if a string has quotes if (string_data_piece.size() < 2 || (string_data_piece[0] != '"' && string_data_piece[string_data_piece.size() - 1] != '"')) { // if not, put it in quotes @@ -101,6 +101,7 @@ static void mixed_to_string(const mixed& data, string& string_data, const uint8_ const array &data_array = data.as_array(); if (data_array.empty()) { string_data.append("[]\n"); // an empty array is represented as [] in YAML + return; } const bool data_array_is_vector = data_array.is_pseudo_vector(); // check if an array has keys increasing by 1 starting from 0 for (const auto &it : data_array) { From 17b3d3e670a80c456453e11dd379e57f44f0bd21 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Mon, 22 May 2023 12:13:24 +0300 Subject: [PATCH 17/25] YAML: implement special characters escaping --- runtime/yaml.cpp | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/runtime/yaml.cpp b/runtime/yaml.cpp index bdf86989ce..ef5b8335cd 100644 --- a/runtime/yaml.cpp +++ b/runtime/yaml.cpp @@ -68,6 +68,37 @@ static string yaml_print_key(const mixed &data_key) noexcept { return string(data_key.as_int()); // key can not be an array; bool and float keys are cast to int } +/* + * escape special characters in a string entry + */ +static string yaml_escape(const string &data) noexcept { + string escaped_data; + for (size_t i = 0; i < data.size(); ++i) { + if (data[i] == 10) { // line feed - code 10 + escaped_data.push_back(92); // backslash + escaped_data.push_back('n'); + } else if (data[i] == 8) { // backspace - code 8 + escaped_data.push_back(92); // backslash + escaped_data.push_back('b'); + } else if (data[i] == 9) { // horizontal tab - code 9 + escaped_data.push_back(92); // backslash + escaped_data.push_back('t'); + } else if (data[i] == 11) { // vertical tab - code 11 + escaped_data.push_back(92); // backslash + escaped_data.push_back('v'); + } else if (data[i] == 34) { // double quotation mark - code 34 + escaped_data.push_back(92); // backslash + escaped_data.push_back(34); // double quotation mark + } else if (data[i] == 92) { // backslash - code 92 + escaped_data.push_back(92); + escaped_data.push_back(92); // double backslash + } else { + escaped_data.push_back(data[i]); + } + } + return escaped_data; +} + /* * get a YAML representation of mixed in a string variable */ @@ -77,16 +108,9 @@ static void mixed_to_string(const mixed &data, string &string_data, const uint8_ if (data.is_null()) { buffer.push_back('~'); // tilda is a YAML representation of NULL } else if (data.is_string()) { - const string &string_data_piece = data.as_string(); - // check if a string has quotes - if (string_data_piece.size() < 2 || (string_data_piece[0] != '"' && string_data_piece[string_data_piece.size() - 1] != '"')) { - // if not, put it in quotes - buffer.push_back('"'); - buffer.append(string_data_piece); - buffer.push_back('"'); - } else { - buffer = string_data_piece; - } + buffer.push_back('"'); // cover string entry in double quotes + buffer.append(yaml_escape(data.as_string())); // escape special characters + buffer.push_back('"'); } else if (data.is_int()) { buffer.append(data.as_int()); } else if (data.is_float()) { From 3dcf68ac31bb3501c3e9cb7779d15ab94e301154 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Mon, 29 May 2023 13:52:40 +0300 Subject: [PATCH 18/25] YAML: add PHP tests for symbols that should be escaped --- tests/phpt/yaml/03_yaml_escaping.php | 16 ++++++++++++++++ tests/phpt/yaml/yaml_escaping.yml | 5 +++++ 2 files changed, 21 insertions(+) create mode 100644 tests/phpt/yaml/03_yaml_escaping.php create mode 100644 tests/phpt/yaml/yaml_escaping.yml diff --git a/tests/phpt/yaml/03_yaml_escaping.php b/tests/phpt/yaml/03_yaml_escaping.php new file mode 100644 index 0000000000..ee35035480 --- /dev/null +++ b/tests/phpt/yaml/03_yaml_escaping.php @@ -0,0 +1,16 @@ +@ok + Date: Tue, 19 Sep 2023 15:45:52 +0300 Subject: [PATCH 19/25] yaml: add a linebreak in test 03 --- tests/phpt/yaml/03_yaml_escaping.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/phpt/yaml/03_yaml_escaping.php b/tests/phpt/yaml/03_yaml_escaping.php index ee35035480..a548c41ec4 100644 --- a/tests/phpt/yaml/03_yaml_escaping.php +++ b/tests/phpt/yaml/03_yaml_escaping.php @@ -13,4 +13,5 @@ function test_yaml_escaping() { echo(serialize($out) . PHP_EOL); } -test_yaml_escaping(); \ No newline at end of file +test_yaml_escaping(); + From ecf7065f21669cfde4f47480004b1bfbab4173d8 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Sun, 1 Oct 2023 15:28:04 +0300 Subject: [PATCH 20/25] yaml: make yaml an extension, turning on by CMake option `-DYAML=ON --- cmake/external-libraries.cmake | 3 +++ runtime/runtime.cmake | 7 ++++++- runtime/{ => yaml}/yaml.cpp | 2 +- runtime/{ => yaml}/yaml.h | 0 tests/cpp/runtime/{ => extensions-tests}/yaml-test.cpp | 2 +- tests/cpp/runtime/runtime-tests.cmake | 7 ++++++- 6 files changed, 17 insertions(+), 4 deletions(-) rename runtime/{ => yaml}/yaml.cpp (99%) rename runtime/{ => yaml}/yaml.h (100%) rename tests/cpp/runtime/{ => extensions-tests}/yaml-test.cpp (99%) diff --git a/cmake/external-libraries.cmake b/cmake/external-libraries.cmake index 44a6734f28..6d208de75b 100644 --- a/cmake/external-libraries.cmake +++ b/cmake/external-libraries.cmake @@ -74,3 +74,6 @@ if(APPLE) add_definitions(-DEPOLL_SHIM_LIB_DIR="${epoll_BINARY_DIR}/src") set(EPOLL_SHIM_LIB epoll-shim) endif() + +option(YAML "build yaml" OFF) +cmake_print_variables(YAML) diff --git a/runtime/runtime.cmake b/runtime/runtime.cmake index e62ad28db0..521ca8b9b7 100644 --- a/runtime/runtime.cmake +++ b/runtime/runtime.cmake @@ -49,6 +49,11 @@ prepend(KPHP_RUNTIME_PDO_PGSQL_SOURCES pdo/pgsql/ pgsql_pdo_emulated_statement.cpp) endif() +if (YAML) +prepend(KPHP_RUNTIME_YAML_SOURCES /yaml/ + yaml.cpp) +endif() + prepend(KPHP_RUNTIME_SOURCES ${BASE_DIR}/runtime/ ${KPHP_RUNTIME_DATETIME_SOURCES} ${KPHP_RUNTIME_MEMORY_RESOURCE_SOURCES} @@ -58,6 +63,7 @@ prepend(KPHP_RUNTIME_SOURCES ${BASE_DIR}/runtime/ ${KPHP_RUNTIME_PDO_SOURCES} ${KPHP_RUNTIME_PDO_MYSQL_SOURCES} ${KPHP_RUNTIME_PDO_PGSQL_SOURCES} + ${KPHP_RUNTIME_YAML_SOURCES} allocator.cpp array_functions.cpp bcmath.cpp @@ -121,7 +127,6 @@ prepend(KPHP_RUNTIME_SOURCES ${BASE_DIR}/runtime/ vkext.cpp vkext_stats.cpp ffi.cpp - yaml.cpp zlib.cpp zstd.cpp) diff --git a/runtime/yaml.cpp b/runtime/yaml/yaml.cpp similarity index 99% rename from runtime/yaml.cpp rename to runtime/yaml/yaml.cpp index ef5b8335cd..8407546f79 100644 --- a/runtime/yaml.cpp +++ b/runtime/yaml/yaml.cpp @@ -3,7 +3,7 @@ #include "runtime/optional.h" #include "runtime/streams.h" #include "runtime/critical_section.h" -#include "runtime/yaml.h" +#include "yaml.h" /* * convert YAML::Node to mixed after parsing a YAML document into YAML::Node diff --git a/runtime/yaml.h b/runtime/yaml/yaml.h similarity index 100% rename from runtime/yaml.h rename to runtime/yaml/yaml.h diff --git a/tests/cpp/runtime/yaml-test.cpp b/tests/cpp/runtime/extensions-tests/yaml-test.cpp similarity index 99% rename from tests/cpp/runtime/yaml-test.cpp rename to tests/cpp/runtime/extensions-tests/yaml-test.cpp index 88f78030c3..7baaf3eb40 100644 --- a/tests/cpp/runtime/yaml-test.cpp +++ b/tests/cpp/runtime/extensions-tests/yaml-test.cpp @@ -1,7 +1,7 @@ #include #include "runtime/files.h" -#include "runtime/yaml.h" +#include "runtime/yaml/yaml.h" TEST(yaml_test, test_yaml_string) { mixed example = string("string"); diff --git a/tests/cpp/runtime/runtime-tests.cmake b/tests/cpp/runtime/runtime-tests.cmake index daefbed534..83d7322efe 100644 --- a/tests/cpp/runtime/runtime-tests.cmake +++ b/tests/cpp/runtime/runtime-tests.cmake @@ -1,4 +1,10 @@ +if (YAML) +prepend(RUNTIME_TESTS_YAML_SOURCES /extensions-tests/ + yaml-test.cpp) +endif() + prepend(RUNTIME_TESTS_SOURCES ${BASE_DIR}/tests/cpp/runtime/ + ${RUNTIME_TESTS_YAML_SOURCES} _runtime-tests-env.cpp allocator-malloc-replacement-test.cpp array-test.cpp @@ -21,7 +27,6 @@ prepend(RUNTIME_TESTS_SOURCES ${BASE_DIR}/tests/cpp/runtime/ memory_resource/unsynchronized_pool_resource-test.cpp string-list-test.cpp string-test.cpp - yaml-test.cpp zstd-test.cpp) allow_deprecated_declarations_for_apple(${BASE_DIR}/tests/cpp/runtime/inter-process-mutex-test.cpp) From 361addabc52b3b102a3d0286bc3110763c0e49f1 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Sat, 7 Oct 2023 13:03:04 +0300 Subject: [PATCH 21/25] yaml: minor changes --- runtime/runtime.cmake | 2 +- runtime/streams.h | 2 +- runtime/{yaml => }/yaml.cpp | 1 - runtime/{yaml => }/yaml.h | 0 tests/cpp/runtime/extensions-tests/yaml-test.cpp | 2 +- 5 files changed, 3 insertions(+), 4 deletions(-) rename runtime/{yaml => }/yaml.cpp (99%) rename runtime/{yaml => }/yaml.h (100%) diff --git a/runtime/runtime.cmake b/runtime/runtime.cmake index 521ca8b9b7..6ee237f8b2 100644 --- a/runtime/runtime.cmake +++ b/runtime/runtime.cmake @@ -50,7 +50,7 @@ prepend(KPHP_RUNTIME_PDO_PGSQL_SOURCES pdo/pgsql/ endif() if (YAML) -prepend(KPHP_RUNTIME_YAML_SOURCES /yaml/ +prepend(KPHP_RUNTIME_YAML_SOURCES / yaml.cpp) endif() diff --git a/runtime/streams.h b/runtime/streams.h index b95321e5b5..7c59084ccd 100644 --- a/runtime/streams.h +++ b/runtime/streams.h @@ -6,7 +6,7 @@ #include "runtime/kphp_core.h" -using Stream =mixed; +using Stream = mixed; constexpr int64_t STREAM_SET_BLOCKING_OPTION = 0; diff --git a/runtime/yaml/yaml.cpp b/runtime/yaml.cpp similarity index 99% rename from runtime/yaml/yaml.cpp rename to runtime/yaml.cpp index 8407546f79..3952d8e40d 100644 --- a/runtime/yaml/yaml.cpp +++ b/runtime/yaml.cpp @@ -2,7 +2,6 @@ #include "runtime/optional.h" #include "runtime/streams.h" -#include "runtime/critical_section.h" #include "yaml.h" /* diff --git a/runtime/yaml/yaml.h b/runtime/yaml.h similarity index 100% rename from runtime/yaml/yaml.h rename to runtime/yaml.h diff --git a/tests/cpp/runtime/extensions-tests/yaml-test.cpp b/tests/cpp/runtime/extensions-tests/yaml-test.cpp index 7baaf3eb40..88f78030c3 100644 --- a/tests/cpp/runtime/extensions-tests/yaml-test.cpp +++ b/tests/cpp/runtime/extensions-tests/yaml-test.cpp @@ -1,7 +1,7 @@ #include #include "runtime/files.h" -#include "runtime/yaml/yaml.h" +#include "runtime/yaml.h" TEST(yaml_test, test_yaml_string) { mixed example = string("string"); From 29797e4abf2fa18e7541133a938e50f56cb4bdd3 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Thu, 12 Oct 2023 13:16:48 +0300 Subject: [PATCH 22/25] yaml: move cpp tests to common dir, delete `extensions-tests` dir --- tests/cpp/runtime/runtime-tests.cmake | 2 +- tests/cpp/runtime/{extensions-tests => }/yaml-test.cpp | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/cpp/runtime/{extensions-tests => }/yaml-test.cpp (100%) diff --git a/tests/cpp/runtime/runtime-tests.cmake b/tests/cpp/runtime/runtime-tests.cmake index 83d7322efe..cf1401fdf5 100644 --- a/tests/cpp/runtime/runtime-tests.cmake +++ b/tests/cpp/runtime/runtime-tests.cmake @@ -1,5 +1,5 @@ if (YAML) -prepend(RUNTIME_TESTS_YAML_SOURCES /extensions-tests/ +prepend(RUNTIME_TESTS_YAML_SOURCES / yaml-test.cpp) endif() diff --git a/tests/cpp/runtime/extensions-tests/yaml-test.cpp b/tests/cpp/runtime/yaml-test.cpp similarity index 100% rename from tests/cpp/runtime/extensions-tests/yaml-test.cpp rename to tests/cpp/runtime/yaml-test.cpp From 12b31da399ec8c243bcfd2dd11e4f84e9f2f94df Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Mon, 23 Oct 2023 02:18:36 +0300 Subject: [PATCH 23/25] yaml: fix issues after @unserialize review, mostly in implementation of char escaping --- builtin-functions/_functions.txt | 8 ++-- runtime/yaml.cpp | 82 +++++++++++++++++--------------- 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt index b993b6788d..e25f7e3187 100644 --- a/builtin-functions/_functions.txt +++ b/builtin-functions/_functions.txt @@ -1654,7 +1654,7 @@ class DateTimeImmutable implements DateTimeInterface { function getenv(string $varname = '', bool $local_only = false): mixed; -function yaml_emit_file ($filename ::: string, $data ::: mixed) ::: bool; -function yaml_emit ($data ::: mixed) ::: string; -function yaml_parse_file ($filename ::: string, $pos ::: int = 0) ::: mixed; -function yaml_parse ($data ::: string, $pos ::: int = 0) ::: mixed; +function yaml_emit_file(string $filename, mixed $data): bool; +function yaml_emit(mixed $data): string; +function yaml_parse_file(string $filename, int $pos = 0): mixed; +function yaml_parse(string $data, int $pos = 0): mixed; diff --git a/runtime/yaml.cpp b/runtime/yaml.cpp index 3952d8e40d..02fb225ef1 100644 --- a/runtime/yaml.cpp +++ b/runtime/yaml.cpp @@ -2,7 +2,7 @@ #include "runtime/optional.h" #include "runtime/streams.h" -#include "yaml.h" +#include "runtime/yaml.h" /* * convert YAML::Node to mixed after parsing a YAML document into YAML::Node @@ -12,7 +12,9 @@ static void yaml_node_to_mixed(const YAML::Node &node, mixed &data, const string if (node.IsScalar()) { const string string_data(node.as().c_str()); // check whether the primitive is put in quotes in the source YAML - const bool string_data_has_quotes = (source[node.Mark().pos] == '"' && source[node.Mark().pos + string_data.size() + 1] == '"'); + const bool string_data_has_quotes = + (source[node.Mark().pos] == '"' && source[node.Mark().pos + string_data.size() + 1] == '"') || + (source[node.Mark().pos] == '\'' && source[node.Mark().pos + string_data.size() + 1] == '\''); // if so, it is a string if (string_data_has_quotes) { data = string_data; @@ -64,7 +66,8 @@ static string yaml_print_key(const mixed &data_key) noexcept { if (data_key.is_string()) { return data_key.as_string(); } - return string(data_key.as_int()); // key can not be an array; bool and float keys are cast to int + // key can not be an array; bool and float keys are cast to int + return string(data_key.as_int()); } /* @@ -73,26 +76,27 @@ static string yaml_print_key(const mixed &data_key) noexcept { static string yaml_escape(const string &data) noexcept { string escaped_data; for (size_t i = 0; i < data.size(); ++i) { - if (data[i] == 10) { // line feed - code 10 - escaped_data.push_back(92); // backslash + const char& current_char = data[i]; + if (current_char == '\n') { // line feed + escaped_data.push_back('\\'); escaped_data.push_back('n'); - } else if (data[i] == 8) { // backspace - code 8 - escaped_data.push_back(92); // backslash + } else if (current_char == '\b') { // backspace + escaped_data.push_back('\\'); escaped_data.push_back('b'); - } else if (data[i] == 9) { // horizontal tab - code 9 - escaped_data.push_back(92); // backslash + } else if (current_char == '\t') { // horizontal tab + escaped_data.push_back('\\'); escaped_data.push_back('t'); - } else if (data[i] == 11) { // vertical tab - code 11 - escaped_data.push_back(92); // backslash + } else if (current_char == '\v') { // vertical tab + escaped_data.push_back('\\'); escaped_data.push_back('v'); - } else if (data[i] == 34) { // double quotation mark - code 34 - escaped_data.push_back(92); // backslash - escaped_data.push_back(34); // double quotation mark - } else if (data[i] == 92) { // backslash - code 92 - escaped_data.push_back(92); - escaped_data.push_back(92); // double backslash + } else if (current_char == '\"') { + escaped_data.push_back('\\'); + escaped_data.push_back('"'); + } else if (current_char == '\\') { + escaped_data.push_back('\\'); + escaped_data.push_back('\\'); } else { - escaped_data.push_back(data[i]); + escaped_data.push_back(current_char); } } return escaped_data; @@ -102,22 +106,21 @@ static string yaml_escape(const string &data) noexcept { * get a YAML representation of mixed in a string variable */ static void mixed_to_string(const mixed &data, string &string_data, const uint8_t nesting_level = 0) noexcept { - string buffer; if (!data.is_array()) { if (data.is_null()) { - buffer.push_back('~'); // tilda is a YAML representation of NULL + string_data.push_back('~'); // tilda is a YAML representation of NULL } else if (data.is_string()) { - buffer.push_back('"'); // cover string entry in double quotes - buffer.append(yaml_escape(data.as_string())); // escape special characters - buffer.push_back('"'); + string_data.push_back('"'); // cover string entry in double quotes + string_data.append(yaml_escape(data.as_string())); // escape special characters + string_data.push_back('"'); } else if (data.is_int()) { - buffer.append(data.as_int()); + string_data.append(data.as_int()); } else if (data.is_float()) { - buffer.append(data.as_double()); + string_data.append(data.as_double()); } else if (data.is_bool()) { - buffer = (data.as_bool()) ? string("true") : string("false"); + const string bool_repr = (data.as_bool()) ? string("true") : string("false"); + string_data.append(bool_repr); } - string_data.append(buffer); string_data.push_back('\n'); return; } @@ -126,23 +129,26 @@ static void mixed_to_string(const mixed &data, string &string_data, const uint8_ string_data.append("[]\n"); // an empty array is represented as [] in YAML return; } - const bool data_array_is_vector = data_array.is_pseudo_vector(); // check if an array has keys increasing by 1 starting from 0 + // check if an array has keys increasing by 1 starting from 0 + const bool data_array_is_vector = data_array.is_pseudo_vector(); for (const auto &it : data_array) { const mixed &data_piece = it.get_value(); - buffer = yaml_print_tabs(nesting_level); + string_data.append(yaml_print_tabs(nesting_level)); if (data_array_is_vector) { - buffer.push_back('-'); + string_data.push_back('-'); } else { - buffer.append(yaml_print_key(it.get_key())); - buffer.push_back(':'); + string_data.append(yaml_print_key(it.get_key())); + string_data.push_back(':'); } if (data_piece.is_array() && !data_piece.as_array().empty()) { - buffer.push_back('\n'); // if an element of an array is also a non-empty array, print it on the next line + // if an element of an array is also a non-empty array, print it on the next line + string_data.push_back('\n'); } else { - buffer.push_back(' '); // if an element of an array is a primitive or an empty array, print it after a space + // if an element of an array is a primitive or an empty array, print it after a space + string_data.push_back(' '); } - string_data.append(buffer); - mixed_to_string(data_piece, string_data, nesting_level + 1); // for entries of an array, increase nesting level + // for entries of an array, increase nesting level + mixed_to_string(data_piece, string_data, nesting_level + 1); } } @@ -170,12 +176,12 @@ string f$yaml_emit(const mixed &data) { mixed f$yaml_parse_file(const string &filename, int pos) { if (filename.empty()) { php_warning("Filename cannot be empty"); - return {}; + return false; } Optional data = f$file_get_contents(filename); if (data.is_false()) { php_warning("Error while reading file \"%s\"", filename.c_str()); - return {}; + return false; } return f$yaml_parse(data.ref(), pos); } From 97474f64c6563f9fbab69834e54d8c9de5a626bf Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Sun, 29 Oct 2023 15:45:09 +0300 Subject: [PATCH 24/25] yaml: little codestyle fixes --- runtime/streams.h | 2 +- runtime/yaml.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/runtime/streams.h b/runtime/streams.h index 7c59084ccd..b95321e5b5 100644 --- a/runtime/streams.h +++ b/runtime/streams.h @@ -6,7 +6,7 @@ #include "runtime/kphp_core.h" -using Stream = mixed; +using Stream =mixed; constexpr int64_t STREAM_SET_BLOCKING_OPTION = 0; diff --git a/runtime/yaml.cpp b/runtime/yaml.cpp index 02fb225ef1..0d63378250 100644 --- a/runtime/yaml.cpp +++ b/runtime/yaml.cpp @@ -55,7 +55,7 @@ static void yaml_node_to_mixed(const YAML::Node &node, mixed &data, const string /* * print tabs in quantity of nesting_level (used to print nested YAML entries) */ -static string yaml_print_tabs(const uint8_t nesting_level) noexcept { +static string yaml_print_tabs(const int nesting_level) noexcept { return string(2 * nesting_level, ' '); } @@ -105,7 +105,7 @@ static string yaml_escape(const string &data) noexcept { /* * get a YAML representation of mixed in a string variable */ -static void mixed_to_string(const mixed &data, string &string_data, const uint8_t nesting_level = 0) noexcept { +static void mixed_to_string(const mixed &data, string &string_data, const int nesting_level = 0) noexcept { if (!data.is_array()) { if (data.is_null()) { string_data.push_back('~'); // tilda is a YAML representation of NULL From c07c1aa05a4f1423abdadaecd5c6628a924111a5 Mon Sep 17 00:00:00 2001 From: Nikolay Osintsev Date: Wed, 15 Nov 2023 00:42:26 +0300 Subject: [PATCH 25/25] yaml: add test for strings in single quotes --- tests/phpt/yaml/yaml_escaping.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/phpt/yaml/yaml_escaping.yml b/tests/phpt/yaml/yaml_escaping.yml index 54e5ac4d05..486810f595 100644 --- a/tests/phpt/yaml/yaml_escaping.yml +++ b/tests/phpt/yaml/yaml_escaping.yml @@ -1,5 +1,8 @@ --- -quote: "'single'quotes'are'not'escaped'double\"quotes\"are\"escaped\"" -newline: "1\n2\n3\n4\n5\nnewlines" -other: "backslash\\horizontal tab\tvertical tab\vevery\b last\b symbol\b is\b erased\b" +string_in_double_quotes: "'single'quotes'are'not'escaped'double\"quotes\"are\"escaped\"" +string_in_single_quotes: 'string inside single quotes' +string_without_quotes: this is a string +number_in_single_quotes_i_string: '12345' +newline_symbols: "1\n2\n3\n4\n5\nnewlines" +string_with_escaped_symbols: "backslash\\horizontal tab\tvertical tab\vevery\b last\b symbol\b is\b erased\b" ...