diff --git a/.gitlab/tests/appsec.yml b/.gitlab/tests/appsec.yml index fb17a41b7f5..7794b987352 100644 --- a/.gitlab/tests/appsec.yml +++ b/.gitlab/tests/appsec.yml @@ -51,3 +51,10 @@ appsec threats fastapi: variables: SUITE_NAME: "appsec_threats_fastapi" retry: 2 + +appsec iast native: + extends: .test_base_hatch + parallel: 6 + variables: + SUITE_NAME: "appsec_iast_native" + retry: 2 diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp index 4fb79ceee24..b1bbafb0317 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp @@ -125,7 +125,7 @@ api_ospathbasename_aspect(const StrType& path) apply_list.append(filler_str); apply_list.append(result_o); - set_ranges_on_splitted(path, ranges, apply_list, tx_map, false); + set_ranges_on_splitted(path, ranges, apply_list, tx_map, true); return apply_list[1]; }); } diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp index 23e43332f18..760312d958c 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp @@ -2,7 +2,6 @@ #include "Initializer/Initializer.h" #include #include -#include using namespace pybind11::literals; namespace py = pybind11; @@ -44,7 +43,11 @@ as_formatted_evidence(const string& text, const optional& tag_mapping_mode, const optional& new_ranges) { - if (text_ranges.empty()) { + if (const auto tx_map = Initializer::get_tainting_map(); !tx_map) { + return text; + } + + if (text_ranges.empty() or text.empty()) { return text; } vector res_vector; @@ -55,20 +58,23 @@ as_formatted_evidence(const string& text, for (const auto& taint_range : text_ranges) { string content; - if (!tag_mapping_mode) { + if (!tag_mapping_mode or tag_mapping_mode.value() == TagMappingMode::Normal) { content = get_default_content(taint_range); } else switch (*tag_mapping_mode) { - case TagMappingMode::Mapper: + case TagMappingMode::Mapper: { content = to_string(taint_range->get_hash()); break; - case TagMappingMode::Mapper_Replace: + } + case TagMappingMode::Mapper_Replace: { content = mapper_replace(taint_range, new_ranges); break; + } default: { // Nothing } } + const auto tag = get_tag(content); const auto range_end = taint_range->start + taint_range->length; @@ -91,21 +97,6 @@ as_formatted_evidence(const string& text, return oss.str(); } -template -StrType -all_as_formatted_evidence(const StrType& text, TagMappingMode tag_mapping_mode) -{ - TaintRangeRefs text_ranges = api_get_ranges(text); - return StrType(as_formatted_evidence(AnyTextObjectToString(text), text_ranges, tag_mapping_mode, nullopt)); -} - -template -StrType -int_as_formatted_evidence(const StrType& text, TaintRangeRefs& text_ranges, TagMappingMode tag_mapping_mode) -{ - return StrType(as_formatted_evidence(AnyTextObjectToString(text), text_ranges, tag_mapping_mode, nullopt)); -} - template StrType api_as_formatted_evidence(const StrType& text, @@ -113,6 +104,10 @@ api_as_formatted_evidence(const StrType& text, const optional& tag_mapping_mode, const optional& new_ranges) { + if (const auto tx_map = Initializer::get_tainting_map(); !tx_map) { + return text; + } + TaintRangeRefs _ranges; if (!text_ranges) { _ranges = api_get_ranges(text); @@ -122,20 +117,6 @@ api_as_formatted_evidence(const StrType& text, return StrType(as_formatted_evidence(AnyTextObjectToString(text), _ranges, tag_mapping_mode, new_ranges)); } -vector -split_taints(const string& str_to_split) -{ - const std::regex rgx(R"((:\+-(<[0-9.a-z\-]+>)?|(<[0-9.a-z\-]+>)?-\+:))"); - std::sregex_token_iterator iter(str_to_split.begin(), str_to_split.end(), rgx, { -1, 0 }); - vector res; - - for (const std::sregex_token_iterator end; iter != end; ++iter) { - res.push_back(*iter); - } - - return res; -} - py::bytearray api_convert_escaped_text_to_taint_text(const py::bytearray& taint_escaped_text, const TaintRangeRefs& ranges_orig) { @@ -200,22 +181,6 @@ api_convert_escaped_text_to_taint_text(PyObject* taint_escaped_text, } } -unsigned long int -getNum(const std::string& s) -{ - unsigned int n = -1; - try { - n = std::stoul(s, nullptr, 10); - if (errno != 0) { - PyErr_Print(); - } - } catch (std::exception&) { - // throw std::invalid_argument("Value is too big"); - PyErr_Print(); - } - return n; -} - template std::tuple convert_escaped_text_to_taint_text(const StrType& taint_escaped_text, const TaintRangeRefs& ranges_orig) @@ -327,49 +292,49 @@ set_ranges_on_splitted(const py::object& source_str, const TaintRangeMapTypePtr& tx_map, bool include_separator) { + RANGE_START offset = 0; bool some_set = false; - // Some quick shortcuts if (source_ranges.empty() or py::len(split_result) == 0 or py::len(source_str) == 0 or not tx_map or tx_map->empty()) { return false; } - RANGE_START offset = 0; - auto c_source_str = py::cast(source_str); - const auto separator_increase = static_cast(not include_separator); - for (const auto& item : split_result) { if (not is_text(item.ptr()) or py::len(item) == 0) { continue; } - auto c_item = py::cast(item); TaintRangeRefs item_ranges; + RANGE_START part_len = py::len(item); + RANGE_START part_start = offset; + RANGE_START part_end = part_start + part_len; - // Find the item in the source_str. - const auto start = static_cast(c_source_str.find(c_item, offset)); - if (start == -1) { - continue; - } - const auto end = static_cast(start + c_item.length()); - - // Find what source_ranges match these positions and create a new range with the start and len updated. + // bool first = true; for (const auto& range : source_ranges) { - if (const auto range_end_abs = range->start + range->length; range->start < end && range_end_abs > start) { - // Create a new range with the updated start - const auto new_range_start = std::max(range->start - offset, 0L); - const auto new_range_length = - std::min(end - start, (range->length - std::max(0L, offset - range->start))); - item_ranges.emplace_back( - initializer->allocate_taint_range(new_range_start, new_range_length, range->source)); + RANGE_START range_start = range->start; + RANGE_START range_end = range->start + range->length; + + // Check for overlap + if (range_start < part_end && range_end > part_start) { + RANGE_START new_start = std::max(range_start - part_start, 0L); + RANGE_START new_end = std::min(range_end - part_start, part_len); + RANGE_START new_length = std::min(new_end - new_start, part_len); + + if (new_length > 0) { + item_ranges.emplace_back(initializer->allocate_taint_range(new_start, new_length, range->source)); + } } } + if (not item_ranges.empty()) { set_ranges(item.ptr(), item_ranges, tx_map); some_set = true; } + offset += part_len; - offset += py::len(item) + separator_increase; + if (!include_separator) { + offset += 1; + } } return some_set; @@ -389,22 +354,6 @@ api_set_ranges_on_splitted(const StrType& source_str, return set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, include_separator); } -py::object -parse_params(size_t position, - const char* keyword_name, - const py::object& default_value, - const py::args& args, - const py::kwargs& kwargs) -{ - if (args.size() >= position + 1) { - return args[position]; - } - if (kwargs && kwargs.contains(keyword_name)) { - return kwargs[keyword_name]; - } - return default_value; -} - bool has_pyerr() { @@ -470,17 +419,6 @@ pyexport_aspect_helpers(py::module& m) "split_result"_a, // cppcheck-suppress assignBoolToPointer "include_separator"_a = false); - m.def("_all_as_formatted_evidence", - &all_as_formatted_evidence, - "text"_a, - "tag_mapping_function"_a = nullopt, - py::return_value_policy::move); - m.def("_int_as_formatted_evidence", - &int_as_formatted_evidence, - "text"_a, - "text_ranges"_a = nullopt, - "tag_mapping_function"_a = nullopt, - py::return_value_policy::move); m.def("as_formatted_evidence", &api_as_formatted_evidence, "text"_a, diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h index 200c3cd70c5..9341cd24b0e 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h @@ -2,7 +2,9 @@ #include #include +#include +#include "Initializer/Initializer.h" #include "TaintTracking/TaintRange.h" using namespace pybind11::literals; @@ -20,11 +22,24 @@ api_common_replace(const py::str& string_method, template StrType -all_as_formatted_evidence(const StrType& text, TagMappingMode tag_mapping_mode); +all_as_formatted_evidence(const StrType& text, TagMappingMode tag_mapping_mode) +{ + if (const auto tx_map = Initializer::get_tainting_map(); !tx_map) { + return text; + } + TaintRangeRefs text_ranges = api_get_ranges(text); + return StrType(as_formatted_evidence(AnyTextObjectToString(text), text_ranges, tag_mapping_mode, nullopt)); +} template StrType -int_as_formatted_evidence(const StrType& text, TaintRangeRefs& text_ranges, TagMappingMode tag_mapping_mode); +int_as_formatted_evidence(const StrType& text, TaintRangeRefs& text_ranges, TagMappingMode tag_mapping_mode) +{ + if (const auto tx_map = Initializer::get_tainting_map(); !tx_map) { + return text; + } + return StrType(as_formatted_evidence(AnyTextObjectToString(text), text_ranges, tag_mapping_mode, nullopt)); +} string as_formatted_evidence(const string& text, @@ -93,11 +108,10 @@ inline string get_tag(const string& content) { if (content.empty()) { - return string(EVIDENCE_MARKS::BLANK); + return { EVIDENCE_MARKS::BLANK }; } - auto result = string(EVIDENCE_MARKS::LESS) + content + string(EVIDENCE_MARKS::GREATER); - return result; + return string(EVIDENCE_MARKS::LESS) + content + string(EVIDENCE_MARKS::GREATER); } inline string @@ -112,26 +126,56 @@ get_default_content(const TaintRangePtr& taint_range) // TODO OPTIMIZATION: check if we can use instead a struct object with range_guid_map, new_ranges and default members so // we dont have to get the keys by string +/** + * @brief Replaces a taint range with a new range from the provided dictionary. + * + * This function takes a `TaintRangePtr` and an optional dictionary of new ranges. + * If the `taint_range` is found in the dictionary, it is replaced with the corresponding new range. + * If the `taint_range` is not found or if `new_ranges` is null, an empty string is returned. + * + * @param taint_range A shared pointer to the original taint range. + * @param new_ranges An optional dictionary containing new taint ranges. + * @return A string representation of the hash of the new taint range if replaced, otherwise an empty string. + */ inline string mapper_replace(const TaintRangePtr& taint_range, const optional& new_ranges) { - if (!taint_range or !new_ranges) { + + if (!taint_range or !new_ranges.has_value() or py::len(new_ranges.value()) == 0) { return {}; } + + const py::dict& new_ranges_value = new_ranges.value(); py::object o = py::cast(taint_range); - if (!new_ranges->contains(o)) { + if (!new_ranges_value.contains(o)) { return {}; } const TaintRange new_range = py::cast((*new_ranges)[o]); return to_string(new_range.get_hash()); } +// FIXME: maybe using an "unsigned" -1 as flag is not the best idea... +inline unsigned long int +getNum(const std::string& s) +{ + unsigned long int n = -1; + try { + n = std::stoul(s, nullptr, 10); + if (errno != 0) { + PyErr_Print(); + } + } catch (std::exception&) { + // throw std::invalid_argument("Value is too big"); + PyErr_Print(); + } + return n; +} + inline PyObject* process_flag_added_args(PyObject* orig_function, const int flag_added_args, PyObject* args, PyObject* kwargs) { // If orig_function is not None and not the built-in str, bytes, or bytearray, slice args - if (const auto orig_function_type = Py_TYPE(orig_function); orig_function != Py_None && orig_function_type != &PyUnicode_Type && orig_function_type != &PyByteArray_Type && orig_function_type != &PyBytes_Type) { @@ -162,6 +206,75 @@ process_flag_added_args(PyObject* orig_function, const int flag_added_args, PyOb return args; } +/** + * @brief Splits a string containing taint markers into its textual components and the markers. + * + * This function takes a string that contains special taint markers (e.g., `:+-<...>-+:`) and splits it + * into separate components: the plain text parts and the taint markers. The markers represent taint information + * surrounding sections of the string, and the result is a vector where both text and markers are included as separate + * elements. + * + * @param str_to_split The input string containing taint markers. + * + * @return A vector of strings where each element is either a part of the original text or a taint marker. + * + * @example + * std::string tainted_str = "This :+-<123>-+:is a :+-<456>-+:test."; + * std::vector result = split_taints(tainted_str); + * // result will be: ["This ", ":+-<123>-+:", "is a ", ":+-<456>-+:", "test."] + */ +inline vector +split_taints(const string& str_to_split) +{ + const std::regex rgx(R"((:\+-(<[0-9.a-z\-]+>)?|(<[0-9.a-z\-]+>)?-\+:))"); + std::sregex_token_iterator iter(str_to_split.begin(), str_to_split.end(), rgx, { -1, 0 }); + vector res; + + for (const std::sregex_token_iterator end; iter != end; ++iter) { + res.push_back(*iter); + } + + return res; +} + +/** + * @brief Retrieves a parameter from either the positional arguments, keyword arguments, or returns a default value. + * + * This function checks if a value is provided in the positional arguments (`args`) at the specified position. + * If not found, it checks the keyword arguments (`kwargs`) for the specified key. If neither is found, + * it returns the default value provided. + * + * @param position The position in the positional arguments (`args`) to check. + * @param keyword_name The name of the keyword to search for in the keyword arguments (`kwargs`). + * @param default_value The default value to return if the argument is not found in either `args` or `kwargs`. + * @param args The list of positional arguments. + * @param kwargs The dictionary of keyword arguments. + * + * @return The parameter found in the positional arguments, keyword arguments, or the default value if none is found. + * + * @example + * py::args args = py::make_tuple(42); + * py::kwargs kwargs; + * py::object default_value = py::int_(0); + * py::object result = parse_params(0, "key", default_value, args, kwargs); + * // In this case, the result will be 42 (the positional argument). + */ +inline py::object +parse_params(size_t position, + const char* keyword_name, + const py::object& default_value, + const py::args& args, + const py::kwargs& kwargs) +{ + if (args.size() >= position + 1) { + return args[position]; + } + if (kwargs && kwargs.contains(keyword_name)) { + return kwargs[keyword_name]; + } + return default_value; +} + void pyexport_aspect_helpers(py::module& m); diff --git a/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt b/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt index 9352a431ba7..29061150d6d 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt +++ b/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt @@ -3,6 +3,7 @@ include(FetchContent) set(APP_NAME _native) option(BUILD_MACOS "Build for MacOS" OFF) +option(NATIVE_TESTING "Load test subdirectories and targets" ON) project(${APP_NAME}) @@ -63,6 +64,9 @@ message(STATUS "Python_EXECUTABLE = ${Python_EXECUTABLE}") #message(STATUS "ICU_INCLUDE_DIRS = ${ICU_INCLUDE_DIRS}") add_subdirectory(_vendor/pybind11) +if (NATIVE_TESTING) + add_subdirectory(tests EXCLUDE_FROM_ALL) +endif () pybind11_add_module(_native SHARED ${SOURCE_FILES} ${HEADER_FILES}) get_filename_component(PARENT_DIR ${CMAKE_CURRENT_LIST_DIR} DIRECTORY) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp index b21e8102345..226f66534f0 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp @@ -10,14 +10,6 @@ using namespace pybind11::literals; using namespace std; -#define GET_HASH_KEY(hash) (hash & 0xFFFFFF) - -typedef struct _PyASCIIObject_State_Hidden -{ - unsigned int : 8; - unsigned int hidden : 24; -} PyASCIIObject_State_Hidden; - // Used to quickly exit on cases where the object is a non interned unicode // string and does not have the fast-taint mark on its internal data structure. // In any other case it will return false so the evaluation continue for (more @@ -46,7 +38,7 @@ is_notinterned_notfasttainted_unicode(const PyObject* objptr) return hash == -1 || e->hidden != GET_HASH_KEY(hash); } -// For non interned unicode strings, set a hidden mark on it's internsal data +// For non interned unicode strings, set a hidden mark on it's internal data // structure that will allow us to quickly check if the string is not tainted // and thus skip further processing without having to search on the tainting map __attribute__((flatten)) void diff --git a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h index d00a52b5a3e..6ff008337c4 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h @@ -10,6 +10,14 @@ using namespace pybind11::literals; namespace py = pybind11; +#define GET_HASH_KEY(hash) (hash & 0xFFFFFF) + +typedef struct _PyASCIIObject_State_Hidden +{ + unsigned int : 8; + unsigned int hidden : 24; +} PyASCIIObject_State_Hidden; + enum class PyTextType { UNICODE = 0, @@ -39,16 +47,13 @@ set_fast_tainted_if_notinterned_unicode(PyObject* objptr); inline bool is_text(const PyObject* pyptr) { - if (!pyptr) - return false; - - return PyUnicode_Check(pyptr) || PyBytes_Check(pyptr) || PyByteArray_Check(pyptr); + return (pyptr != nullptr) and (PyUnicode_Check(pyptr) or PyBytes_Check(pyptr) or PyByteArray_Check(pyptr)); } inline bool is_tainteable(const PyObject* pyptr) { - return is_text(pyptr) || PyReMatch_Check(pyptr); + return pyptr != nullptr and (is_text(pyptr) or PyReMatch_Check(pyptr)); } // Base function for the variadic template @@ -96,7 +101,7 @@ StringToPyObject(const string& str, const PyTextType type) case PyTextType::BYTEARRAY: return py::bytearray(str); default: - return {}; + return py::none(); } } @@ -109,6 +114,10 @@ StringToPyObject(const char* str, const PyTextType type) inline string PyObjectToString(PyObject* obj) { + if (obj == nullptr or !PyUnicode_Check(obj)) { + return ""; + } + const char* str = PyUnicode_AsUTF8(obj); if (str == nullptr) { diff --git a/ddtrace/appsec/_iast/_taint_tracking/clean.sh b/ddtrace/appsec/_iast/_taint_tracking/clean.sh index 9542882c81e..32815fec547 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/clean.sh +++ b/ddtrace/appsec/_iast/_taint_tracking/clean.sh @@ -3,5 +3,9 @@ set -exu #cd -- "$(dirname -- "${BASH_SOURCE[0]}")" || exit rm -rf CMakeFiles/ CMakeCache.txt Makefile cmake_install.cmake __pycache__/ .cmake *.cbp Testing +rm -rf tests/CMakeFiles/ tests/CMakeCache.txt tests/Makefile tests/cmake_install.cmake tests/__pycache__/ tests/.cmake *.cbp rm -rf cmake-build-debug cmake-build-default cmake-build-tests +rm -rf tests/cmake-build-debug tests/cmake-build-default tests/cmake-build-tests yes|rm -f *.so +yes|rm -f tests/*.so +yes|rm -f tests/native_tests diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt b/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt new file mode 100644 index 00000000000..4d75a89c4d9 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt @@ -0,0 +1,25 @@ +include(FetchContent) +FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/refs/tags/release-1.11.0.zip +) +FetchContent_MakeAvailable(googletest) + +enable_testing() + +include_directories(${PYTHON_INCLUDE_DIRS}) +include_directories(${PYBIND11_INCLUDE_DIRS}) + +file(GLOB TEST_SOURCES "*.cpp") +add_executable(native_tests ${TEST_SOURCES} ${SOURCE_FILES} ${HEADER_FILES}) + +if (CMAKE_BUILD_TYPE STREQUAL "Release") + target_link_libraries(native_tests gtest gtest_main ${PYTHON_LIBRARIES} pybind11::module absl::node_hash_map) +else() + target_link_libraries(native_tests gtest gtest_main ${PYTHON_LIBRARIES} pybind11::module) +endif () + + +# Discover tests +include(GoogleTest) +gtest_discover_tests(native_tests) diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp new file mode 100644 index 00000000000..15386780274 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp @@ -0,0 +1,667 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace py = pybind11; + +class PyEnvCheck : public ::testing::Test +{ + protected: + void SetUp() override { py::initialize_interpreter(); } + + void TearDown() override { py::finalize_interpreter(); } +}; + +class PyEnvWithContext : public ::testing::Test +{ + protected: + void SetUp() override + { + initializer = make_unique(); + py::initialize_interpreter(); + initializer->create_context(); + } + + void TearDown() override + { + initializer->reset_context(); + py::finalize_interpreter(); + } +}; + +using HasPyErrCheck = PyEnvCheck; + +TEST_F(HasPyErrCheck, NoErrorReturnsFalse) +{ + EXPECT_FALSE(has_pyerr()); + EXPECT_STREQ(has_pyerr_as_string().c_str(), ""); +} + +TEST_F(HasPyErrCheck, ErrorReturnsTrue) +{ + PyErr_SetString(PyExc_RuntimeError, "Test error"); + EXPECT_TRUE(has_pyerr()); + EXPECT_STREQ(has_pyerr_as_string().c_str(), "Test error"); + PyErr_Clear(); +} + +TEST_F(HasPyErrCheck, ClearError) +{ + PyErr_SetString(PyExc_RuntimeError, "Test error"); + EXPECT_TRUE(has_pyerr()); + EXPECT_STREQ(has_pyerr_as_string().c_str(), "Test error"); + + // Clear the error + PyErr_Clear(); + EXPECT_FALSE(has_pyerr()); + EXPECT_STREQ(has_pyerr_as_string().c_str(), ""); +} + +using GetTagCheck = ::testing::Test; + +TEST_F(GetTagCheck, HandlesEmptyString) +{ + std::string input = ""; + std::string expected_output = EVIDENCE_MARKS::BLANK; + EXPECT_STREQ(get_tag(input).c_str(), expected_output.c_str()); +} + +TEST_F(GetTagCheck, HandlesNonEmptyString) +{ + std::string input = "example"; + std::string expected_output = std::string(EVIDENCE_MARKS::LESS) + "example" + std::string(EVIDENCE_MARKS::GREATER); + EXPECT_STREQ(get_tag(input).c_str(), expected_output.c_str()); +} + +TEST_F(GetTagCheck, HandlesSpecialCharacters) +{ + std::string input = "special!@#"; + std::string expected_output = + std::string(EVIDENCE_MARKS::LESS) + "special!@#" + std::string(EVIDENCE_MARKS::GREATER); + EXPECT_STREQ(get_tag(input).c_str(), expected_output.c_str()); +} +using GetDefaultContentCheck = ::testing::Test; + +TEST_F(GetDefaultContentCheck, HandlesEmptySourceName) +{ + TaintRangePtr taint_range = std::make_shared(); + taint_range->source.name = ""; + std::string expected_output = ""; + EXPECT_STREQ(get_default_content(taint_range).c_str(), expected_output.c_str()); +} + +TEST_F(GetDefaultContentCheck, HandlesNonEmptySourceName) +{ + TaintRangePtr taint_range = std::make_shared(); + taint_range->source.name = "example"; + std::string expected_output = "example"; + EXPECT_STREQ(get_default_content(taint_range).c_str(), expected_output.c_str()); +} + +TEST_F(GetDefaultContentCheck, HandlesSpecialCharactersInSourceName) +{ + TaintRangePtr taint_range = std::make_shared(); + taint_range->source.name = "special!@#"; + std::string expected_output = "special!@#"; + EXPECT_STREQ(get_default_content(taint_range).c_str(), expected_output.c_str()); +} + +using MapperReplaceCheck = PyEnvCheck; + +TEST_F(MapperReplaceCheck, HandlesNullTaintRange) +{ + optional new_ranges = py::dict(); + EXPECT_STREQ(mapper_replace(nullptr, new_ranges).c_str(), ""); +} + +TEST_F(MapperReplaceCheck, HandlesNullNewRanges) +{ + TaintRangePtr taint_range = std::make_shared(); + EXPECT_STREQ(mapper_replace(taint_range, nullopt).c_str(), ""); +} + +TEST_F(MapperReplaceCheck, HandlesNonExistingRange) +{ + TaintRangePtr taint_range = std::make_shared(); + optional new_ranges = py::dict(); + EXPECT_STREQ(mapper_replace(taint_range, new_ranges).c_str(), ""); +} + +// FIXME: not working, check with Alberto +TEST_F(MapperReplaceCheck, DISABLED_HandlesExistingRange) +{ + TaintRangePtr taint_range = std::make_shared(); + taint_range->start = 0; + taint_range->length = 5; + taint_range->source.name = "example"; + + TaintRangePtr new_range = std::make_shared(); + new_range->start = 0; + new_range->length = 5; + new_range->source.name = "new_example"; + + py::dict new_ranges; + new_ranges[py::cast(taint_range)] = py::cast(new_range); + + EXPECT_STREQ(mapper_replace(taint_range, new_ranges).c_str(), std::to_string(new_range->get_hash()).c_str()); +} + +using GetNumTest = PyEnvCheck; + +TEST_F(GetNumTest, ValidNumber) +{ + std::string valid_str = "12345"; + unsigned long int result = getNum(valid_str); + EXPECT_EQ(result, 12345); +} + +TEST_F(GetNumTest, EmptyString) +{ + std::string empty_str = ""; + unsigned long int result = getNum(empty_str); + EXPECT_EQ(result, static_cast(-1)); +} + +TEST_F(GetNumTest, InvalidString) +{ + std::string invalid_str = "abc"; + unsigned long int result = getNum(invalid_str); + EXPECT_EQ(result, static_cast(-1)); +} + +TEST_F(GetNumTest, OutOfRangeNumber) +{ + std::string out_of_range_str = "999999999999999999999999"; + unsigned long int result = getNum(out_of_range_str); + EXPECT_EQ(result, static_cast(-1)); // Should return -1 due to exception +} + +TEST_F(GetNumTest, MaxUnsignedLong) +{ + std::string max_ulong_str = std::to_string(ULONG_MAX); + unsigned long int result = getNum(max_ulong_str); + EXPECT_EQ(result, ULONG_MAX); +} + +using AsFormattedEvidenceCheck = PyEnvWithContext; +using AsFormattedEvidenceCheckNoContext = PyEnvCheck; + +TEST_F(AsFormattedEvidenceCheckNoContext, NoTaintMapSameString) +{ + const py::str text("This is a test string."); + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 4, source) }; + const py::str result = as_formatted_evidence(text, taint_ranges); + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); +} + +TEST_F(AsFormattedEvidenceCheck, NoTaintRanges) +{ + std::string text = "This is a test string."; + TaintRangeRefs taint_ranges; // Empty ranges + std::string result = as_formatted_evidence(text, taint_ranges, std::nullopt); + EXPECT_STREQ(result.c_str(), text.c_str()); +} + +TEST_F(AsFormattedEvidenceCheck, SingleTaintRangeWithNoMapper) +{ + const std::string text = "This is a test string."; + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 4, source) }; + const std::string expected_result = "This :+-is a-+: test string."; // Expected tagged output + const std::string result = as_formatted_evidence(text, taint_ranges, nullopt, nullopt); + EXPECT_STREQ(result.c_str(), expected_result.c_str()); +} + +TEST_F(AsFormattedEvidenceCheck, MultipleTaintRangesWithNoMapper) +{ + const std::string text = "This is a test string."; + Source source1("source1", "sample_value1", OriginType::BODY); + Source source2("source2", "sample_value2", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 2, source1), + std::make_shared(10, 4, source2) }; + const std::string expected_result = "This :+-is-+: a :+-test-+: string."; + const std::string result = as_formatted_evidence(text, taint_ranges, nullopt, nullopt); + EXPECT_STREQ(result.c_str(), expected_result.c_str()); +} + +TEST_F(AsFormattedEvidenceCheck, DefaultTagMappingModeIsMapper) +{ + const std::string text = "This is a test string."; + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 2, source) }; + + const std::string expected_result = "This :+-<3485454368>is<3485454368>-+: a test string."; + const std::string result = as_formatted_evidence(text, taint_ranges); + EXPECT_STREQ(result.c_str(), expected_result.c_str()); +} + +TEST_F(AsFormattedEvidenceCheck, MultipleRangesWithMapper) +{ + const std::string text = "This is a test string."; + Source source1("source1", "sample_value", OriginType::BODY); + Source source2("source2", "sample_value", OriginType::PARAMETER); + TaintRangeRefs taint_ranges = { + std::make_shared(5, 2, source1), + std::make_shared(10, 4, source2), + }; + + const std::string expected_result = + "This :+-<3485454368>is<3485454368>-+: a :+-<891889858>test<891889858>-+: string."; + const std::string result = as_formatted_evidence(text, taint_ranges); + EXPECT_STREQ(result.c_str(), expected_result.c_str()); +} + +// FIXME: same problem as mapper_replace test above +TEST_F(AsFormattedEvidenceCheck, DISABLED_SingleTaintRangeWithMapperReplace) +{ + const std::string text = "This is a test string."; + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 2, source) }; + + py::dict new_ranges; + TaintRange new_range(5, 2, Source("new_source", "sample_value", OriginType::BODY)); + new_ranges[py::cast(taint_ranges[0])] = new_range; + + const std::string expected_result = "This :+-is-+: a test string."; + const std::string result = as_formatted_evidence(text, taint_ranges, TagMappingMode::Mapper_Replace, new_ranges); + EXPECT_STREQ(result.c_str(), expected_result.c_str()); +} + +TEST_F(AsFormattedEvidenceCheck, EmptyTextWithTaintRanges) +{ + const std::string text; + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(0, 1, source) }; + const std::string expected_result; + const std::string result = as_formatted_evidence(text, taint_ranges, nullopt, nullopt); + EXPECT_STREQ(result.c_str(), expected_result.c_str()); +} + +using AllAsFormattedEvidenceCheck = PyEnvWithContext; +using AllAsFormattedEvidenceCheckNoContext = PyEnvCheck; + +TEST_F(AllAsFormattedEvidenceCheckNoContext, NoTaintMapSameString) +{ + const py::str text("This is a test string."); + const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper); + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); +} + +TEST_F(AllAsFormattedEvidenceCheck, NoRangesSameString) +{ + const py::str text("This is a test string."); + const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper); + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); +} + +TEST_F(AllAsFormattedEvidenceCheck, SingleTaintRangeWithNormalMapper) +{ + py::str text("This is a test string."); + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 2, source) }; + api_set_ranges(text, taint_ranges); + + const py::str expected_result("This :+-is-+: a test string."); + const py::str result = all_as_formatted_evidence(text, TagMappingMode::Normal); + + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(expected_result).c_str()); +} + +TEST_F(AllAsFormattedEvidenceCheck, SingleTaintRangeWithMapper) +{ + py::str text("This is a test string."); + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 2, source) }; + api_set_ranges(text, taint_ranges); + + const py::str expected_result("This :+-<3485454368>is<3485454368>-+: a test string."); + const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper); + + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(expected_result).c_str()); +} + +// See above +TEST_F(AllAsFormattedEvidenceCheck, DISABLED_SingleTaintRangeWithMapperReplace) +{ + py::str text("This is a test string."); + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 2, source) }; + api_set_ranges(text, taint_ranges); + + py::dict new_ranges; + TaintRange new_range(5, 2, Source("new_source", "sample_value", OriginType::BODY)); + new_ranges[py::cast(taint_ranges[0])] = new_range; + + const py::str expected_result("This :+-is-+: a test string."); + const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper_Replace); + + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(expected_result).c_str()); +} + +TEST_F(AllAsFormattedEvidenceCheck, EmptyText) +{ + const py::str text(""); + const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper); + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); +} + +using ParseParamsCheck = PyEnvCheck; + +TEST_F(ParseParamsCheck, PositionalArgumentPresent) +{ + py::args args = py::make_tuple(42); + py::kwargs kwargs; + py::object default_value = py::int_(0); + + py::object result = parse_params(0, "key", default_value, args, kwargs); + EXPECT_EQ(result.cast(), 42); +} + +TEST_F(ParseParamsCheck, KeywordArgumentPresent) +{ + py::args args; + py::kwargs kwargs; + kwargs["key"] = py::int_(42); + py::object default_value = py::int_(0); + + py::object result = parse_params(0, "key", default_value, args, kwargs); + EXPECT_EQ(result.cast(), 42); +} + +TEST_F(ParseParamsCheck, NoArgumentUsesDefault) +{ + py::args args; + py::kwargs kwargs; + py::object default_value = py::int_(42); + + py::object result = parse_params(0, "key", default_value, args, kwargs); + EXPECT_EQ(result.cast(), 42); +} + +TEST_F(ParseParamsCheck, PositionalOverridesKeyword) +{ + py::args args = py::make_tuple(100); + py::kwargs kwargs; + kwargs["key"] = py::int_(42); + py::object default_value = py::int_(0); + + py::object result = parse_params(0, "key", default_value, args, kwargs); + EXPECT_EQ(result.cast(), 100); +} + +TEST_F(ParseParamsCheck, HandlesMissingKeyword) +{ + py::args args; + py::kwargs kwargs; + py::object default_value = py::str("default_value"); + + py::object result = parse_params(0, "missing_key", default_value, args, kwargs); + EXPECT_STREQ(result.cast().c_str(), "default_value"); +} + +TEST(SplitTaints, EmptyString) +{ + std::string input = ""; + std::vector expected_output = { "" }; + std::vector result = split_taints(input); + EXPECT_EQ(result, expected_output); +} + +TEST(SplitTaints, NoTaintsInString) +{ + std::string input = "This is a regular string."; + std::vector expected_output = { "This is a regular string." }; + std::vector result = split_taints(input); + EXPECT_EQ(result, expected_output); +} + +TEST(SplitTaints, SingleTaintInString) +{ + std::string input = "This is a :+-test-+: string."; + std::vector expected_output = { "This is a ", ":+-", "test", "-+:", " string." }; + std::vector result = split_taints(input); + EXPECT_EQ(result, expected_output); +} + +TEST(SplitTaints, MultipleTaintsInString) +{ + std::string input = "This :+-is-+: a :+-test-+: string."; + std::vector expected_output = { "This ", ":+-", "is", "-+:", " a ", + ":+-", "test", "-+:", " string." }; + std::vector result = split_taints(input); + EXPECT_EQ(result, expected_output); +} + +TEST(SplitTaints, TaintsAtStartAndEnd) +{ + std::string input = ":+-Start-+: and :+-End-+:"; + std::vector expected_output = { "", ":+-", "Start", "-+:", + " and ", ":+-", "End", "-+:" }; + std::vector result = split_taints(input); + EXPECT_EQ(result, expected_output); +} + +TEST(SplitTaints, ConsecutiveTaints) +{ + std::string input = "Text :+-taint1-+: :+-taint2-+: after."; + std::vector expected_output = { "Text ", ":+-", "taint1", "-+:", " ", + ":+-", "taint2", "-+:", " after." }; + std::vector result = split_taints(input); + EXPECT_EQ(result, expected_output); +} + +using SetRangesOnSplittedCheck = PyEnvWithContext; + +TEST_F(SetRangesOnSplittedCheck, EmptySourceAndSplit) +{ + py::str source_str = ""; + py::list split_result; + TaintRangeRefs source_ranges; + auto tx_map = Initializer::get_tainting_map(); + bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, false); + EXPECT_FALSE(result); + + for (const auto& item : split_result) { + EXPECT_STREQ(AnyTextObjectToString(item.cast()).c_str(), ""); + auto ranges = get_ranges(item.ptr(), tx_map); + EXPECT_TRUE(ranges.first.empty()); + } +} + +TEST_F(SetRangesOnSplittedCheck, SingleSplitWithoutSeparator) +{ + py::str source_str = "This is a test string."; + py::list split_result; + split_result.append(py::str("This")); + split_result.append(py::str("is a test string.")); + + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs source_ranges = { std::make_shared(0, 4, source) }; + api_set_ranges(source_str, source_ranges); + auto tx_map = Initializer::get_tainting_map(); + bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, false); + EXPECT_TRUE(result); + + auto first = split_result[0]; + auto first_ranges = get_ranges(first.ptr(), tx_map); + EXPECT_EQ(first_ranges.first.size(), 1); + EXPECT_EQ(first_ranges.first[0]->start, 0); + EXPECT_EQ(first_ranges.first[0]->length, 4); + + auto last = split_result[1]; + auto last_ranges = get_ranges(last.ptr(), tx_map); + EXPECT_TRUE(last_ranges.first.empty()); +} + +TEST_F(SetRangesOnSplittedCheck, MultipleSplitsNoSeparator) +{ + py::str source_str = "This is a test string."; + py::list split_result; + split_result.append(py::str("This")); + split_result.append(py::str("is")); + split_result.append(py::str("a")); + split_result.append(py::str("test")); + split_result.append(py::str("string.")); + + Source source1("source1", "sample_value1", OriginType::BODY); + Source source2("source2", "sample_value2", OriginType::BODY); + TaintRangeRefs source_ranges = { + std::make_shared(0, 4, source1), // Taint "This" + std::make_shared(10, 4, source2) // Taint "test" + }; + api_set_ranges(source_str, source_ranges); + auto tx_map = Initializer::get_tainting_map(); + + bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, false); + EXPECT_TRUE(result); + + // Check first split part "This" + auto first = split_result[0]; + auto first_ranges = get_ranges(first.ptr(), tx_map); + EXPECT_EQ(first_ranges.first.size(), 1); + EXPECT_EQ(first_ranges.first[0]->start, 0); + EXPECT_EQ(first_ranges.first[0]->length, 4); + + // Check middle split part "test" + auto test_part = split_result[3]; + auto test_ranges = get_ranges(test_part.ptr(), tx_map); + EXPECT_EQ(test_ranges.first.size(), 1); + EXPECT_EQ(test_ranges.first[0]->start, 0); // Position within "test" + EXPECT_EQ(test_ranges.first[0]->length, 4); + + // Check that other parts have no ranges + for (int i : { 1, 2, 4 }) { + auto part = split_result[i]; + auto part_ranges = get_ranges(part.ptr(), tx_map); + EXPECT_TRUE(part_ranges.first.empty()); + } +} + +TEST_F(SetRangesOnSplittedCheck, SplitWithSeparatorIncluded) +{ + py::str source_str = "This|is|a|test|string."; + py::list split_result; + split_result.append(py::str("This")); + split_result.append(py::str("|is")); + split_result.append(py::str("|a")); + split_result.append(py::str("|test")); + split_result.append(py::str("|string.")); + + Source source1("source1", "sample_value1", OriginType::BODY); + Source source2("source2", "sample_value2", OriginType::BODY); + TaintRangeRefs source_ranges = { + std::make_shared(0, 4, source1), // Taint "This" + std::make_shared(10, 4, source2) // Taint "test" + }; + api_set_ranges(source_str, source_ranges); + auto tx_map = Initializer::get_tainting_map(); + + bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, true); + EXPECT_TRUE(result); + + // Check first split part "This" + auto first = split_result[0]; + auto first_ranges = get_ranges(first.ptr(), tx_map); + EXPECT_EQ(first_ranges.first.size(), 1); + EXPECT_EQ(first_ranges.first[0]->start, 0); + EXPECT_EQ(first_ranges.first[0]->length, 4); + + // Check middle split part "test" + auto test_part = split_result[3]; + auto test_ranges = get_ranges(test_part.ptr(), tx_map); + EXPECT_EQ(test_ranges.first.size(), 1); + EXPECT_EQ(test_ranges.first[0]->start, 1); // Position within "test" + EXPECT_EQ(test_ranges.first[0]->length, 4); + + // Check that other parts have no ranges + for (int i : { 1, 2, 4 }) { + auto part = split_result[i]; + auto part_ranges = get_ranges(part.ptr(), tx_map); + EXPECT_TRUE(part_ranges.first.empty()); + } +} + +TEST_F(SetRangesOnSplittedCheck, EmptyRanges) +{ + py::str source_str = "This is a test string."; + py::list split_result; + split_result.append(py::str("This")); + split_result.append(py::str("is a test string.")); + + TaintRangeRefs source_ranges; // Empty ranges + auto tx_map = Initializer::get_tainting_map(); + + bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, false); + EXPECT_FALSE(result); + + // Check that no ranges are applied to the split result + for (const auto& item : split_result) { + auto item_ranges = get_ranges(item.ptr(), tx_map); + EXPECT_TRUE(item_ranges.first.empty()); + } +} + +using ProcessFlagAddedArgsTest = PyEnvCheck; + +TEST_F(ProcessFlagAddedArgsTest, NoAddedArgsOriginalNone) +{ + PyObject* orig_function = Py_None; + int flag_added_args = 0; + py::tuple args = py::make_tuple("arg1", "arg2"); + py::dict kwargs; + + PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); + + // Should return args as no slicing is required + EXPECT_EQ(result, args.ptr()); +} + +// Test with added args, original function is None +TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalNone) +{ + PyObject* orig_function = Py_None; + int flag_added_args = 1; + py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); + py::dict kwargs; + + PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); + + // Should return the full argument list since no slicing is needed + EXPECT_EQ(result, args.ptr()); +} + +// Test with added args, original function is custom +TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalCustomFunction) +{ + PyObject* orig_function = Py_None; + py::object custom_function = py::cpp_function([](py::str arg1, py::str arg2) { return arg1; }); + orig_function = custom_function.ptr(); + + int flag_added_args = 1; + py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); + py::dict kwargs; + + PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); + EXPECT_STREQ(AnyTextObjectToString(py::reinterpret_borrow(result)).c_str(), "arg2"); +} + +// Test with no added args, original function is custom +TEST_F(ProcessFlagAddedArgsTest, NoAddedArgsOriginalCustomFunction) +{ + py::object custom_function = py::cpp_function([](py::str arg1, py::str arg2) { return arg1; }); + PyObject* orig_function = custom_function.ptr(); + + int flag_added_args = 0; + py::tuple args = py::make_tuple("arg1", "arg2"); + py::dict kwargs; + + PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); + EXPECT_STREQ(AnyTextObjectToString(py::reinterpret_borrow(result)).c_str(), "arg1"); +} diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp new file mode 100644 index 00000000000..92a30f2e0f2 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp @@ -0,0 +1,539 @@ +#include +#include +#include +#include + +#include + +namespace py = pybind11; + +class PyEnvCheck : public ::testing::Test +{ + protected: + void SetUp() override { py::initialize_interpreter(); } + + void TearDown() override { py::finalize_interpreter(); } +}; + +using GetUniqueId = PyEnvCheck; + +TEST_F(GetUniqueId, TestGetUniqueId) +{ + PyObject* py_str = PyUnicode_FromString("test_string"); + auto expected_value = reinterpret_cast(py_str); + EXPECT_EQ(get_unique_id(py_str), expected_value); + + PyObject* nullobject = nullptr; + expected_value = reinterpret_cast(nullobject); + EXPECT_EQ(get_unique_id(nullobject), expected_value); + + Py_DECREF(py_str); +} + +using PyReMatchCheck = PyEnvCheck; + +TEST_F(PyReMatchCheck, TestPyReMatchValidMatchObject) +{ + py::object re_module = py::module_::import("re"); + py::object match_obj = re_module.attr("match")("a", "a"); + + ASSERT_TRUE(PyReMatch_Check(match_obj.ptr())); +} + +TEST_F(PyReMatchCheck, TEstPyReMatchInvalidNonMatchObject) +{ + py::object non_match_obj = py::int_(42); // Not a `re.Match` object + + ASSERT_FALSE(PyReMatch_Check(non_match_obj.ptr())); +} + +TEST_F(PyReMatchCheck, TEstPyReMatchNullObject) +{ + PyObject* null_obj = Py_None; + + ASSERT_FALSE(PyReMatch_Check(null_obj)); +} + +using IsFastTaintedCheck = PyEnvCheck; + +TEST_F(IsFastTaintedCheck, FastTaintedNullptrReturnsTrue) +{ + EXPECT_TRUE(is_notinterned_notfasttainted_unicode(nullptr)); +} + +TEST_F(IsFastTaintedCheck, FastTaintedNonUnicodeReturnsFalse) +{ + PyObject* non_unicode = PyLong_FromLong(42); + EXPECT_FALSE(is_notinterned_notfasttainted_unicode(non_unicode)); + Py_DECREF(non_unicode); +} + +TEST_F(IsFastTaintedCheck, FastTaintedInternedUnicodeReturnsTrue) +{ + PyObject* interned_unicode = PyUnicode_InternFromString("interned"); + EXPECT_TRUE(is_notinterned_notfasttainted_unicode(interned_unicode)); + Py_DECREF(interned_unicode); +} + +TEST_F(IsFastTaintedCheck, NonInternedUnicodeWithHashMinusOneReturnsTrue) +{ + PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); + reinterpret_cast(non_interned_unicode)->hash = -1; + EXPECT_TRUE(is_notinterned_notfasttainted_unicode(non_interned_unicode)); + Py_DECREF(non_interned_unicode); +} + +TEST_F(IsFastTaintedCheck, NonInternedUnicodeWithHiddenNotMatchingHashReturnsTrue) +{ + PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); + reinterpret_cast(non_interned_unicode)->hash = 12345; + reinterpret_cast<_PyASCIIObject_State_Hidden*>(&reinterpret_cast(non_interned_unicode)->state) + ->hidden = 54321; + EXPECT_TRUE(is_notinterned_notfasttainted_unicode(non_interned_unicode)); + Py_DECREF(non_interned_unicode); +} + +TEST_F(IsFastTaintedCheck, NonInternedUnicodeWithHiddenMatchingHashReturnsFalse) +{ + PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); + reinterpret_cast(non_interned_unicode)->hash = 12345; + reinterpret_cast<_PyASCIIObject_State_Hidden*>(&reinterpret_cast(non_interned_unicode)->state) + ->hidden = GET_HASH_KEY(12345); + EXPECT_FALSE(is_notinterned_notfasttainted_unicode(non_interned_unicode)); + Py_DECREF(non_interned_unicode); +} + +using SetFastTaintedCheck = PyEnvCheck; + +TEST_F(SetFastTaintedCheck, NullptrDoesNothing) +{ + set_fast_tainted_if_notinterned_unicode(nullptr); + // No assertion needed, just ensure no crash +} + +TEST_F(SetFastTaintedCheck, NonUnicodeDoesNothing) +{ + PyObject* non_unicode = PyLong_FromLong(42); + set_fast_tainted_if_notinterned_unicode(non_unicode); + // No assertion needed, just ensure no crash + Py_DECREF(non_unicode); +} + +TEST_F(SetFastTaintedCheck, InternedUnicodeDoesNothing) +{ + PyObject* interned_unicode = PyUnicode_InternFromString("interned"); + set_fast_tainted_if_notinterned_unicode(interned_unicode); + // No assertion needed, just ensure no crash + Py_DECREF(interned_unicode); +} + +TEST_F(SetFastTaintedCheck, NonInternedUnicodeSetsHidden) +{ + PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); + reinterpret_cast(non_interned_unicode)->hash = 12345; + set_fast_tainted_if_notinterned_unicode(non_interned_unicode); + const _PyASCIIObject_State_Hidden* e = + (_PyASCIIObject_State_Hidden*)&(((PyASCIIObject*)non_interned_unicode)->state); + EXPECT_EQ(e->hidden, GET_HASH_KEY(12345)); + Py_DECREF(non_interned_unicode); +} + +TEST_F(SetFastTaintedCheck, NonInternedUnicodeWithHashMinusOneSetsHidden) +{ + PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); + reinterpret_cast(non_interned_unicode)->hash = -1; + set_fast_tainted_if_notinterned_unicode(non_interned_unicode); + Py_hash_t hash = PyObject_Hash(non_interned_unicode); + const _PyASCIIObject_State_Hidden* e = + (_PyASCIIObject_State_Hidden*)&(((PyASCIIObject*)non_interned_unicode)->state); + EXPECT_EQ(e->hidden, GET_HASH_KEY(hash)); + Py_DECREF(non_interned_unicode); +} + +using IsTextCheck = PyEnvCheck; + +TEST_F(IsTextCheck, NullptrReturnsFalse) +{ + EXPECT_FALSE(is_text(nullptr)); +} + +TEST_F(IsTextCheck, UnicodeReturnsTrue) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + EXPECT_TRUE(is_text(unicode_obj)); + Py_DECREF(unicode_obj); +} + +TEST_F(IsTextCheck, BytesReturnsTrue) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + EXPECT_TRUE(is_text(bytes_obj)); + Py_DECREF(bytes_obj); +} + +TEST_F(IsTextCheck, ByteArrayReturnsTrue) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + EXPECT_TRUE(is_text(bytearray_obj)); + Py_DECREF(bytearray_obj); + Py_DECREF(bytes_obj); +} + +TEST_F(IsTextCheck, NonTextReturnsFalse) +{ + PyObject* non_text_obj = PyLong_FromLong(42); + EXPECT_FALSE(is_text(non_text_obj)); + Py_DECREF(non_text_obj); +} + +using IsTainteableCheck = PyEnvCheck; + +TEST_F(IsTainteableCheck, NullptrReturnsFalse) +{ + EXPECT_FALSE(is_tainteable(nullptr)); +} + +TEST_F(IsTainteableCheck, UnicodeReturnsTrue) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + EXPECT_TRUE(is_tainteable(unicode_obj)); + Py_DECREF(unicode_obj); +} + +TEST_F(IsTainteableCheck, BytesReturnsTrue) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + EXPECT_TRUE(is_tainteable(bytes_obj)); + Py_DECREF(bytes_obj); +} + +TEST_F(IsTainteableCheck, ByteArrayReturnsTrue) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + EXPECT_TRUE(is_tainteable(bytearray_obj)); + Py_DECREF(bytearray_obj); + Py_DECREF(bytes_obj); +} + +TEST_F(IsTainteableCheck, NonTextReturnsFalse) +{ + PyObject* non_text_obj = PyLong_FromLong(42); + EXPECT_FALSE(is_tainteable(non_text_obj)); + Py_DECREF(non_text_obj); +} + +TEST_F(IsTainteableCheck, ReMatchReturnsTrue) +{ + py::object re = py::module_::import("re"); + py::object match = re.attr("match")("a", "a"); + EXPECT_TRUE(is_tainteable(match.ptr())); +} + +using ArgsAreTextAndSameTypeCheck = PyEnvCheck; + +TEST_F(ArgsAreTextAndSameTypeCheck, NullptrReturnsFalse) +{ + EXPECT_FALSE(args_are_text_and_same_type(nullptr, nullptr)); +} + +TEST_F(ArgsAreTextAndSameTypeCheck, MixedTypesReturnFalse) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + PyObject* bytes_obj = PyBytes_FromString("test"); + EXPECT_FALSE(args_are_text_and_same_type(unicode_obj, bytes_obj)); + Py_DECREF(unicode_obj); + Py_DECREF(bytes_obj); +} + +TEST_F(ArgsAreTextAndSameTypeCheck, AllUnicodeReturnsTrue) +{ + PyObject* unicode_obj1 = PyUnicode_FromString("test1"); + PyObject* unicode_obj2 = PyUnicode_FromString("test2"); + EXPECT_TRUE(args_are_text_and_same_type(unicode_obj1, unicode_obj2)); + Py_DECREF(unicode_obj1); + Py_DECREF(unicode_obj2); +} + +TEST_F(ArgsAreTextAndSameTypeCheck, AllBytesReturnsTrue) +{ + PyObject* bytes_obj1 = PyBytes_FromString("test1"); + PyObject* bytes_obj2 = PyBytes_FromString("test2"); + EXPECT_TRUE(args_are_text_and_same_type(bytes_obj1, bytes_obj2)); + Py_DECREF(bytes_obj1); + Py_DECREF(bytes_obj2); +} + +TEST_F(ArgsAreTextAndSameTypeCheck, AllByteArrayReturnsTrue) +{ + PyObject* bytes_obj1 = PyBytes_FromString("test1"); + PyObject* bytearray_obj1 = PyByteArray_FromObject(bytes_obj1); + PyObject* bytes_obj2 = PyBytes_FromString("test2"); + PyObject* bytearray_obj2 = PyByteArray_FromObject(bytes_obj2); + EXPECT_TRUE(args_are_text_and_same_type(bytearray_obj1, bytearray_obj2)); + Py_DECREF(bytearray_obj1); + Py_DECREF(bytearray_obj2); + Py_DECREF(bytes_obj1); + Py_DECREF(bytes_obj2); +} + +TEST_F(ArgsAreTextAndSameTypeCheck, MixedTextTypesReturnFalse) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + PyObject* bytes_obj = PyBytes_FromString("test"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + EXPECT_FALSE(args_are_text_and_same_type(unicode_obj, bytes_obj, bytearray_obj)); + Py_DECREF(unicode_obj); + Py_DECREF(bytes_obj); + Py_DECREF(bytearray_obj); +} + +using PyObjectToStringCheck = PyEnvCheck; + +TEST_F(PyObjectToStringCheck, NullptrReturnsEmptyString) +{ + EXPECT_STREQ(PyObjectToString(nullptr).c_str(), ""); +} + +TEST_F(PyObjectToStringCheck, UnicodeReturnsCorrectString) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + EXPECT_STREQ(PyObjectToString(unicode_obj).c_str(), "test"); + Py_DECREF(unicode_obj); +} + +TEST_F(PyObjectToStringCheck, NonUnicodeReturnsEmptyString) +{ + PyObject* non_unicode_obj = PyLong_FromLong(42); + EXPECT_STREQ(PyObjectToString(non_unicode_obj).c_str(), ""); + Py_DECREF(non_unicode_obj); +} + +using StringToPyObjectCheck = PyEnvCheck; + +TEST_F(StringToPyObjectCheck, ConvertsToUnicode) +{ + std::string test_str = "test"; + py::object py_obj = StringToPyObject(test_str, PyTextType::UNICODE); + EXPECT_TRUE(PyUnicode_Check(py_obj.ptr())); + EXPECT_STREQ(PyUnicode_AsUTF8(py_obj.ptr()), test_str.c_str()); +} + +TEST_F(StringToPyObjectCheck, ConvertsToBytes) +{ + std::string test_str = "test"; + py::object py_obj = StringToPyObject(test_str, PyTextType::BYTES); + EXPECT_TRUE(PyBytes_Check(py_obj.ptr())); + EXPECT_STREQ(PyBytes_AsString(py_obj.ptr()), test_str.c_str()); +} + +TEST_F(StringToPyObjectCheck, ConvertsToByteArray) +{ + std::string test_str = "test"; + py::object py_obj = StringToPyObject(test_str, PyTextType::BYTEARRAY); + EXPECT_TRUE(PyByteArray_Check(py_obj.ptr())); + EXPECT_STREQ(PyByteArray_AsString(py_obj.ptr()), test_str.c_str()); +} + +TEST_F(StringToPyObjectCheck, InvalidTypeReturnsNone) +{ + std::string test_str = "test"; + py::object py_obj = StringToPyObject(test_str, PyTextType::OTHER); + EXPECT_TRUE(py_obj.is_none()); +} + +using AnyTextObjectToStringCheck = PyEnvCheck; + +TEST_F(AnyTextObjectToStringCheck, UnicodeReturnsCorrectString) +{ + auto unicode_obj = py::str("test"); + EXPECT_STREQ(AnyTextObjectToString(unicode_obj).c_str(), "test"); +} + +TEST_F(AnyTextObjectToStringCheck, BytesReturnsCorrectString) +{ + auto bytes_obj = py::bytes("test"); + EXPECT_STREQ(AnyTextObjectToString(bytes_obj).c_str(), "test"); +} + +TEST_F(AnyTextObjectToStringCheck, ByteArrayReturnsCorrectString) +{ + auto bytearray_obj = py::bytearray("test"); + EXPECT_STREQ(AnyTextObjectToString(bytearray_obj).c_str(), "test"); +} + +TEST_F(AnyTextObjectToStringCheck, NonTextReturnsEmptyString) +{ + auto non_text_obj = py::int_(42); + EXPECT_STREQ(AnyTextObjectToString(non_text_obj).c_str(), ""); +} + +using PyObjectToPyTextCheck = PyEnvCheck; + +TEST_F(PyObjectToPyTextCheck, UnicodeReturnsPyStr) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + auto result = PyObjectToPyText(unicode_obj); + ASSERT_TRUE(result.has_value()); + EXPECT_TRUE(py::isinstance(result.value())); + Py_DECREF(unicode_obj); +} + +TEST_F(PyObjectToPyTextCheck, BytesReturnsPyBytes) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + auto result = PyObjectToPyText(bytes_obj); + ASSERT_TRUE(result.has_value()); + EXPECT_TRUE(py::isinstance(result.value())); + Py_DECREF(bytes_obj); +} + +TEST_F(PyObjectToPyTextCheck, ByteArrayReturnsPyByteArray) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + auto result = PyObjectToPyText(bytearray_obj); + ASSERT_TRUE(result.has_value()); + EXPECT_TRUE(py::isinstance(result.value())); + Py_DECREF(bytes_obj); + Py_DECREF(bytearray_obj); +} + +TEST_F(PyObjectToPyTextCheck, NonTextReturnsEmptyOptional) +{ + PyObject* non_text_obj = PyLong_FromLong(42); + auto result = PyObjectToPyText(non_text_obj); + EXPECT_FALSE(result.has_value()); + Py_DECREF(non_text_obj); +} + +using GetPyTextTypeCheck = PyEnvCheck; + +TEST_F(GetPyTextTypeCheck, UnicodeReturnsUnicodeType) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + EXPECT_EQ(get_pytext_type(unicode_obj), PyTextType::UNICODE); + Py_DECREF(unicode_obj); +} + +TEST_F(GetPyTextTypeCheck, BytesReturnsBytesType) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + EXPECT_EQ(get_pytext_type(bytes_obj), PyTextType::BYTES); + Py_DECREF(bytes_obj); +} + +TEST_F(GetPyTextTypeCheck, ByteArrayReturnsByteArrayType) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + EXPECT_EQ(get_pytext_type(bytearray_obj), PyTextType::BYTEARRAY); + Py_DECREF(bytearray_obj); + Py_DECREF(bytes_obj); +} + +TEST_F(GetPyTextTypeCheck, NonTextReturnsOtherType) +{ + PyObject* non_text_obj = PyLong_FromLong(42); + EXPECT_EQ(get_pytext_type(non_text_obj), PyTextType::OTHER); + Py_DECREF(non_text_obj); +} + +using NewPyObjectIdCheck = PyEnvCheck; + +TEST_F(NewPyObjectIdCheck, ValidTaintedUnicodeReturnsNewId) +{ + PyObject* tainted_obj = PyUnicode_FromString("tainted"); + PyObject* new_id_obj = new_pyobject_id(tainted_obj); + + ASSERT_NE(new_id_obj, nullptr); + ASSERT_NE(tainted_obj, new_id_obj); + EXPECT_TRUE(PyUnicode_Check(new_id_obj)); + EXPECT_STREQ(PyUnicode_AsUTF8(tainted_obj), PyUnicode_AsUTF8(new_id_obj)); + + Py_DECREF(tainted_obj); + Py_DECREF(new_id_obj); +} + +TEST_F(NewPyObjectIdCheck, ValidTaintedBytesReturnsNewId) +{ + PyObject* tainted_obj = PyBytes_FromString("tainted"); + PyObject* new_id_obj = new_pyobject_id(tainted_obj); + + ASSERT_NE(new_id_obj, nullptr); + ASSERT_NE(tainted_obj, new_id_obj); + EXPECT_TRUE(PyBytes_Check(new_id_obj)); + EXPECT_STREQ(PyBytes_AsString(tainted_obj), PyBytes_AsString(new_id_obj)); + + Py_DECREF(tainted_obj); + Py_DECREF(new_id_obj); +} + +TEST_F(NewPyObjectIdCheck, ValidTaintedByteArrayReturnsNewId) +{ + PyObject* bytes_obj = PyBytes_FromString("tainted"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + PyObject* new_id_obj = new_pyobject_id(bytearray_obj); + + ASSERT_NE(new_id_obj, nullptr); + ASSERT_NE(bytes_obj, new_id_obj); + ASSERT_NE(bytearray_obj, new_id_obj); + EXPECT_TRUE(PyByteArray_Check(new_id_obj)); + EXPECT_STREQ(PyByteArray_AsString(bytearray_obj), PyByteArray_AsString(new_id_obj)); + + Py_DECREF(bytearray_obj); + Py_DECREF(bytes_obj); +} + +TEST_F(NewPyObjectIdCheck, NullObjectReturnsNull) +{ + PyObject* new_id_obj = new_pyobject_id(nullptr); + EXPECT_EQ(new_id_obj, nullptr); +} + +using GetPyObjectSizeCheck = PyEnvCheck; + +TEST_F(GetPyObjectSizeCheck, UnicodeReturnsCorrectSize) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + EXPECT_EQ(get_pyobject_size(unicode_obj), 4); + Py_DECREF(unicode_obj); +} + +TEST_F(GetPyObjectSizeCheck, NonSingleCodepointUnicodeReturnsCorrectSize) +{ + PyObject* unicode_obj = PyUnicode_FromString("𝄞𝄞"); // Musical symbol G clef (non-single codepoint) + EXPECT_EQ(get_pyobject_size(unicode_obj), 2); + Py_DECREF(unicode_obj); +} + +TEST_F(GetPyObjectSizeCheck, BytesReturnsCorrectSize) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + EXPECT_EQ(get_pyobject_size(bytes_obj), 4); + Py_DECREF(bytes_obj); +} + +TEST_F(GetPyObjectSizeCheck, NonSingleCodepointByteReturnsCorrectSize) +{ + PyObject* bytes_obj = PyBytes_FromString("𝄞𝄞"); + EXPECT_EQ(get_pyobject_size(bytes_obj), 8); + Py_DECREF(bytes_obj); +} + +TEST_F(GetPyObjectSizeCheck, ByteArrayReturnsCorrectSize) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + EXPECT_EQ(get_pyobject_size(bytearray_obj), 4); + Py_DECREF(bytearray_obj); + Py_DECREF(bytes_obj); +} + +TEST_F(GetPyObjectSizeCheck, NonTextReturnsZero) +{ + PyObject* non_text_obj = PyLong_FromLong(42); + EXPECT_EQ(get_pyobject_size(non_text_obj), 0); + Py_DECREF(non_text_obj); +} diff --git a/hatch.toml b/hatch.toml index 16b90392797..594a87dde9b 100644 --- a/hatch.toml +++ b/hatch.toml @@ -264,6 +264,26 @@ flask = ["~=2.3"] python = ["3.8", "3.10", "3.12"] flask = ["~=3.0"] +## ASM Native IAST module + +[envs.appsec_iast_native] +template = "appsec_iast_native" +dependencies = [ + "cmake", + "pybind11", + "clang" +] + +[envs.appsec_iast_native.scripts] +test = [ + "cmake -DCMAKE_BUILD_TYPE=Debug -DPYTHON_EXECUTABLE=python -S ddtrace/appsec/_iast/_taint_tracking -B ddtrace/appsec/_iast/_taint_tracking", + "make -f ddtrace/appsec/_iast/_taint_tracking/tests/Makefile native_tests", + "ddtrace/appsec/_iast/_taint_tracking/tests/native_tests" +] + +[[envs.appsec_iast_native.matrix]] +python = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + ## ASM FastAPI [envs.appsec_threats_fastapi] diff --git a/tests/.suitespec.json b/tests/.suitespec.json index c6663a1c0a3..d9f98352541 100644 --- a/tests/.suitespec.json +++ b/tests/.suitespec.json @@ -633,6 +633,14 @@ "@remoteconfig", "tests/appsec/iast/*" ], + "appsec_iast_native": [ + "@bootstrap", + "@core", + "@tracing", + "@appsec", + "@appsec_iast", + "@remoteconfig" + ], "appsec_iast_memcheck": [ "@bootstrap", "@core", diff --git a/tests/appsec/iast/aspects/test_aspect_helpers.py b/tests/appsec/iast/aspects/test_aspect_helpers.py index 7e8a5a41230..d0ca09c4482 100644 --- a/tests/appsec/iast/aspects/test_aspect_helpers.py +++ b/tests/appsec/iast/aspects/test_aspect_helpers.py @@ -168,10 +168,10 @@ def test_set_ranges_on_splitted_ospathsplit(): def test_set_ranges_on_splitted_ospathsplitext(): s = "abc/efgh/jkl.txt" - range1 = _build_sample_range(0, 3, s[0:2]) - range2 = _build_sample_range(4, 4, s[4:8]) - range3 = _build_sample_range(9, 3, s[9:12]) - range4 = _build_sample_range(13, 4, s[13:17]) + range1 = _build_sample_range(0, 3, s[0:2]) # abc + range2 = _build_sample_range(4, 4, s[4:8]) # efgh + range3 = _build_sample_range(9, 3, s[9:12]) # jkl + range4 = _build_sample_range(13, 4, s[13:17]) # txt set_ranges(s, (range1, range2, range3, range4)) ranges = get_ranges(s) assert ranges @@ -185,7 +185,7 @@ def test_set_ranges_on_splitted_ospathsplitext(): TaintRange(9, 3, Source("jkl", "sample_value", OriginType.PARAMETER)), ] assert get_ranges(parts[1]) == [ - TaintRange(1, 4, Source("txt", "sample_value", OriginType.PARAMETER)), + TaintRange(1, 3, Source("txt", "sample_value", OriginType.PARAMETER)), ] @@ -426,7 +426,7 @@ def test_set_ranges_on_splitted_bytearray() -> None: assert get_ranges(parts[0]) == [TaintRange(0, 2, Source("ab", "sample_value", OriginType.PARAMETER))] assert get_ranges(parts[1]) == [ TaintRange(1, 1, Source("f", "sample_value", OriginType.PARAMETER)), - TaintRange(1, 4, Source("second", "sample_value", OriginType.PARAMETER)), + TaintRange(1, 3, Source("second", "sample_value", OriginType.PARAMETER)), ] assert get_ranges(parts[2]) == [TaintRange(0, 2, Source("third", "sample_value", OriginType.PARAMETER))] @@ -443,4 +443,4 @@ def test_set_ranges_on_splitted_wrong_args(): parts = ["123", 45] set_ranges_on_splitted(s, ranges, parts) ranges = get_ranges(parts[0]) - assert ranges == [TaintRange(1, 3, Source("123", "sample_value", OriginType.PARAMETER))] + assert ranges == [TaintRange(1, 2, Source("123", "sample_value", OriginType.PARAMETER))]