From be2d572a9633e4715836ea12cd37f7bfcab92d13 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 10 Sep 2024 13:24:43 +0200 Subject: [PATCH 01/24] Add google testing for the native IAST module Signed-off-by: Juanjo Alvarez --- .../_iast/_taint_tracking/CMakeLists.txt | 10 ++++++++++ .../_iast/_taint_tracking/tests/CMakeLists.txt | 18 ++++++++++++++++++ .../_taint_tracking/tests/test_example.cpp | 5 +++++ 3 files changed, 33 insertions(+) create mode 100644 ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt create mode 100644 ddtrace/appsec/_iast/_taint_tracking/tests/test_example.cpp diff --git a/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt b/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt index 9352a431ba7..f5cece87149 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt +++ b/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt @@ -3,6 +3,7 @@ include(FetchContent) set(APP_NAME _native) option(BUILD_MACOS "Build for MacOS" OFF) +option(NATIVE_TESTING "Load test subdirectories and targets" ON) project(${APP_NAME}) @@ -33,6 +34,12 @@ else () message("Debug mode: not using abseil") endif () +FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/refs/tags/release-1.12.1.zip +) +FetchContent_MakeAvailable(googletest) + include_directories(".") file(GLOB SOURCE_FILES "*.cpp" @@ -63,6 +70,9 @@ message(STATUS "Python_EXECUTABLE = ${Python_EXECUTABLE}") #message(STATUS "ICU_INCLUDE_DIRS = ${ICU_INCLUDE_DIRS}") add_subdirectory(_vendor/pybind11) +if (NATIVE_TESTING) + add_subdirectory(tests EXCLUDE_FROM_ALL) +endif() pybind11_add_module(_native SHARED ${SOURCE_FILES} ${HEADER_FILES}) get_filename_component(PARENT_DIR ${CMAKE_CURRENT_LIST_DIR} DIRECTORY) diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt b/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt new file mode 100644 index 00000000000..a1923fd363a --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt @@ -0,0 +1,18 @@ +include(GoogleTest) + +file(GLOB_RECURSE NATIVE_TEST_SOURCE *.cpp) +add_executable(native_test ${NATIVE_TEST_SOURCE}) + +set_target_properties(native_test PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO) + +target_include_directories(native_test PRIVATE ${NATIVE_PUBLIC_INCLUDES} ${NATIVE_PRIVATE_INCLUDES}) + +# Link GoogleTest +target_link_libraries(native_test gtest gtest_main) + +# Use gtest_discover_tests to automatically discover and run tests +gtest_discover_tests(native_test) + diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_example.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_example.cpp new file mode 100644 index 00000000000..6fc34389870 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_example.cpp @@ -0,0 +1,5 @@ +#include + +TEST(SampleTest, BasicAssertion) { + EXPECT_EQ(1 + 1, 2); +} From b62725bb9b039ab66736879268a94a6a1dd074bf Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 10 Sep 2024 14:48:10 +0200 Subject: [PATCH 02/24] Link unittests to pybind11 and Python Signed-off-by: Juanjo Alvarez --- .../_taint_tracking/tests/CMakeLists.txt | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt b/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt index a1923fd363a..5d9b9846c31 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt @@ -1,18 +1,21 @@ -include(GoogleTest) - -file(GLOB_RECURSE NATIVE_TEST_SOURCE *.cpp) -add_executable(native_test ${NATIVE_TEST_SOURCE}) +include(FetchContent) +FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/refs/tags/release-1.11.0.zip +) +FetchContent_MakeAvailable(googletest) -set_target_properties(native_test PROPERTIES - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED YES - CXX_EXTENSIONS NO) +enable_testing() -target_include_directories(native_test PRIVATE ${NATIVE_PUBLIC_INCLUDES} ${NATIVE_PRIVATE_INCLUDES}) +include_directories(${PYTHON_INCLUDE_DIRS}) +include_directories(${PYBIND11_INCLUDE_DIRS}) -# Link GoogleTest -target_link_libraries(native_test gtest gtest_main) +file(GLOB TEST_SOURCES "*.cpp") +add_executable(native_tests ${TEST_SOURCES}) -# Use gtest_discover_tests to automatically discover and run tests -gtest_discover_tests(native_test) +# Link test executable against gtest & gtest_main +target_link_libraries(native_tests gtest gtest_main ${PYTHON_LIBRARIES} pybind11::module) +# Discover tests +include(GoogleTest) +gtest_discover_tests(native_tests) From cd3e5e1aa17331ffd2a86c9d62b74f74ff72cd8a Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 10 Sep 2024 15:10:27 +0200 Subject: [PATCH 03/24] Added another test file, some fixes Signed-off-by: Juanjo Alvarez --- .../_taint_tracking/Utils/StringUtils.cpp | 10 +- .../_iast/_taint_tracking/Utils/StringUtils.h | 8 ++ .../_taint_tracking/tests/CMakeLists.txt | 10 +- .../tests/test_stringutils.cpp | 113 ++++++++++++++++++ 4 files changed, 129 insertions(+), 12 deletions(-) create mode 100644 ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp diff --git a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp index b21e8102345..226f66534f0 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.cpp @@ -10,14 +10,6 @@ using namespace pybind11::literals; using namespace std; -#define GET_HASH_KEY(hash) (hash & 0xFFFFFF) - -typedef struct _PyASCIIObject_State_Hidden -{ - unsigned int : 8; - unsigned int hidden : 24; -} PyASCIIObject_State_Hidden; - // Used to quickly exit on cases where the object is a non interned unicode // string and does not have the fast-taint mark on its internal data structure. // In any other case it will return false so the evaluation continue for (more @@ -46,7 +38,7 @@ is_notinterned_notfasttainted_unicode(const PyObject* objptr) return hash == -1 || e->hidden != GET_HASH_KEY(hash); } -// For non interned unicode strings, set a hidden mark on it's internsal data +// For non interned unicode strings, set a hidden mark on it's internal data // structure that will allow us to quickly check if the string is not tainted // and thus skip further processing without having to search on the tainting map __attribute__((flatten)) void diff --git a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h index d00a52b5a3e..f495f538af5 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h @@ -10,6 +10,14 @@ using namespace pybind11::literals; namespace py = pybind11; +#define GET_HASH_KEY(hash) (hash & 0xFFFFFF) + +typedef struct _PyASCIIObject_State_Hidden +{ + unsigned int : 8; + unsigned int hidden : 24; +} PyASCIIObject_State_Hidden; + enum class PyTextType { UNICODE = 0, diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt b/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt index 5d9b9846c31..4d75a89c4d9 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt @@ -11,10 +11,14 @@ include_directories(${PYTHON_INCLUDE_DIRS}) include_directories(${PYBIND11_INCLUDE_DIRS}) file(GLOB TEST_SOURCES "*.cpp") -add_executable(native_tests ${TEST_SOURCES}) +add_executable(native_tests ${TEST_SOURCES} ${SOURCE_FILES} ${HEADER_FILES}) + +if (CMAKE_BUILD_TYPE STREQUAL "Release") + target_link_libraries(native_tests gtest gtest_main ${PYTHON_LIBRARIES} pybind11::module absl::node_hash_map) +else() + target_link_libraries(native_tests gtest gtest_main ${PYTHON_LIBRARIES} pybind11::module) +endif () -# Link test executable against gtest & gtest_main -target_link_libraries(native_tests gtest gtest_main ${PYTHON_LIBRARIES} pybind11::module) # Discover tests include(GoogleTest) diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp new file mode 100644 index 00000000000..006a764fb0d --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp @@ -0,0 +1,113 @@ +#include +#include +#include +#include + +#include + +namespace py = pybind11; + +class PyEnvTest : public ::testing::Test +{ + protected: + void SetUp() override + { + // Initialize the Python interpreter for pybind11 + py::initialize_interpreter(); + } + + void TearDown() override + { + // Finalize the Python interpreter + py::finalize_interpreter(); + } +}; + +// get_unique_id === +TEST_F(PyEnvTest, TestGetUniqueId) +{ + PyObject* py_str = PyUnicode_FromString("test_string"); + auto expected_value = reinterpret_cast(py_str); + EXPECT_EQ(get_unique_id(py_str), expected_value); + + PyObject* nullobject = nullptr; + expected_value = reinterpret_cast(nullobject); + EXPECT_EQ(get_unique_id(nullobject), expected_value); + + Py_DECREF(py_str); +} + +// PyReMatch_Check === + +// Test case to check a valid `re.Match` object +TEST_F(PyEnvTest, TestPyReMatchValidMatchObject) +{ + py::object re_module = py::module_::import("re"); + py::object match_obj = re_module.attr("match")("a", "a"); + + ASSERT_TRUE(PyReMatch_Check(match_obj.ptr())); +} + +// Test case to check an invalid object (not `re.Match`) +TEST_F(PyEnvTest, TEstPyReMatchInvalidNonMatchObject) +{ + py::object non_match_obj = py::int_(42); // Not a `re.Match` object + + ASSERT_FALSE(PyReMatch_Check(non_match_obj.ptr())); +} + +// Test case to check a `None` (null) object +TEST_F(PyEnvTest, TEstPyReMatchNullObject) +{ + PyObject* null_obj = Py_None; + + ASSERT_FALSE(PyReMatch_Check(null_obj)); +} + +// set_fast_tainted_if_notinterned_unicode === +TEST_F(PyEnvTest, FastTaintedNullptrReturnsTrue) +{ + EXPECT_TRUE(is_notinterned_notfasttainted_unicode(nullptr)); +} + +TEST_F(PyEnvTest, FastTaintedNonUnicodeReturnsFalse) +{ + PyObject* non_unicode = PyLong_FromLong(42); + EXPECT_FALSE(is_notinterned_notfasttainted_unicode(non_unicode)); + Py_DECREF(non_unicode); +} + +TEST_F(PyEnvTest, FastTaintedInternedUnicodeReturnsTrue) +{ + PyObject* interned_unicode = PyUnicode_InternFromString("interned"); + EXPECT_TRUE(is_notinterned_notfasttainted_unicode(interned_unicode)); + Py_DECREF(interned_unicode); +} + +TEST_F(PyEnvTest, NonInternedUnicodeWithHashMinusOneReturnsTrue) +{ + PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); + reinterpret_cast(non_interned_unicode)->hash = -1; + EXPECT_TRUE(is_notinterned_notfasttainted_unicode(non_interned_unicode)); + Py_DECREF(non_interned_unicode); +} + +TEST_F(PyEnvTest, NonInternedUnicodeWithHiddenNotMatchingHashReturnsTrue) +{ + PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); + reinterpret_cast(non_interned_unicode)->hash = 12345; + reinterpret_cast<_PyASCIIObject_State_Hidden*>(&reinterpret_cast(non_interned_unicode)->state) + ->hidden = 54321; + EXPECT_TRUE(is_notinterned_notfasttainted_unicode(non_interned_unicode)); + Py_DECREF(non_interned_unicode); +} + +TEST_F(PyEnvTest, NonInternedUnicodeWithHiddenMatchingHashReturnsFalse) +{ + PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); + reinterpret_cast(non_interned_unicode)->hash = 12345; + reinterpret_cast<_PyASCIIObject_State_Hidden*>(&reinterpret_cast(non_interned_unicode)->state) + ->hidden = GET_HASH_KEY(12345); + EXPECT_FALSE(is_notinterned_notfasttainted_unicode(non_interned_unicode)); + Py_DECREF(non_interned_unicode); +} From 131f43d9a61ac639dcff604b9b0e42a6503e86fa Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 10 Sep 2024 17:33:15 +0200 Subject: [PATCH 04/24] Complete stringutils unittests Signed-off-by: Juanjo Alvarez --- .../_iast/_taint_tracking/Utils/StringUtils.h | 8 +- .../_taint_tracking/tests/test_example.cpp | 5 - .../tests/test_stringutils.cpp | 482 +++++++++++++++++- 3 files changed, 460 insertions(+), 35 deletions(-) delete mode 100644 ddtrace/appsec/_iast/_taint_tracking/tests/test_example.cpp diff --git a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h index f495f538af5..13389d01767 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h @@ -56,7 +56,7 @@ is_text(const PyObject* pyptr) inline bool is_tainteable(const PyObject* pyptr) { - return is_text(pyptr) || PyReMatch_Check(pyptr); + return pyptr != nullptr and (is_text(pyptr) or PyReMatch_Check(pyptr)); } // Base function for the variadic template @@ -104,7 +104,7 @@ StringToPyObject(const string& str, const PyTextType type) case PyTextType::BYTEARRAY: return py::bytearray(str); default: - return {}; + return py::none(); } } @@ -117,6 +117,10 @@ StringToPyObject(const char* str, const PyTextType type) inline string PyObjectToString(PyObject* obj) { + if (obj == nullptr or !PyUnicode_Check(obj)) { + return ""; + } + const char* str = PyUnicode_AsUTF8(obj); if (str == nullptr) { diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_example.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_example.cpp deleted file mode 100644 index 6fc34389870..00000000000 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_example.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include - -TEST(SampleTest, BasicAssertion) { - EXPECT_EQ(1 + 1, 2); -} diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp index 006a764fb0d..0e15f9515d6 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp @@ -7,24 +7,17 @@ namespace py = pybind11; -class PyEnvTest : public ::testing::Test +class PyEnvCheck : public ::testing::Test { protected: - void SetUp() override - { - // Initialize the Python interpreter for pybind11 - py::initialize_interpreter(); - } - - void TearDown() override - { - // Finalize the Python interpreter - py::finalize_interpreter(); - } + void SetUp() override { py::initialize_interpreter(); } + + void TearDown() override { py::finalize_interpreter(); } }; -// get_unique_id === -TEST_F(PyEnvTest, TestGetUniqueId) +using GetUniqueId = PyEnvCheck; + +TEST_F(GetUniqueId, TestGetUniqueId) { PyObject* py_str = PyUnicode_FromString("test_string"); auto expected_value = reinterpret_cast(py_str); @@ -37,10 +30,9 @@ TEST_F(PyEnvTest, TestGetUniqueId) Py_DECREF(py_str); } -// PyReMatch_Check === +using PyReMatchCheck = PyEnvCheck; -// Test case to check a valid `re.Match` object -TEST_F(PyEnvTest, TestPyReMatchValidMatchObject) +TEST_F(PyReMatchCheck, TestPyReMatchValidMatchObject) { py::object re_module = py::module_::import("re"); py::object match_obj = re_module.attr("match")("a", "a"); @@ -48,43 +40,42 @@ TEST_F(PyEnvTest, TestPyReMatchValidMatchObject) ASSERT_TRUE(PyReMatch_Check(match_obj.ptr())); } -// Test case to check an invalid object (not `re.Match`) -TEST_F(PyEnvTest, TEstPyReMatchInvalidNonMatchObject) +TEST_F(PyReMatchCheck, TEstPyReMatchInvalidNonMatchObject) { py::object non_match_obj = py::int_(42); // Not a `re.Match` object ASSERT_FALSE(PyReMatch_Check(non_match_obj.ptr())); } -// Test case to check a `None` (null) object -TEST_F(PyEnvTest, TEstPyReMatchNullObject) +TEST_F(PyReMatchCheck, TEstPyReMatchNullObject) { PyObject* null_obj = Py_None; ASSERT_FALSE(PyReMatch_Check(null_obj)); } -// set_fast_tainted_if_notinterned_unicode === -TEST_F(PyEnvTest, FastTaintedNullptrReturnsTrue) +using IsFastTaintedCheck = PyEnvCheck; + +TEST_F(IsFastTaintedCheck, FastTaintedNullptrReturnsTrue) { EXPECT_TRUE(is_notinterned_notfasttainted_unicode(nullptr)); } -TEST_F(PyEnvTest, FastTaintedNonUnicodeReturnsFalse) +TEST_F(IsFastTaintedCheck, FastTaintedNonUnicodeReturnsFalse) { PyObject* non_unicode = PyLong_FromLong(42); EXPECT_FALSE(is_notinterned_notfasttainted_unicode(non_unicode)); Py_DECREF(non_unicode); } -TEST_F(PyEnvTest, FastTaintedInternedUnicodeReturnsTrue) +TEST_F(IsFastTaintedCheck, FastTaintedInternedUnicodeReturnsTrue) { PyObject* interned_unicode = PyUnicode_InternFromString("interned"); EXPECT_TRUE(is_notinterned_notfasttainted_unicode(interned_unicode)); Py_DECREF(interned_unicode); } -TEST_F(PyEnvTest, NonInternedUnicodeWithHashMinusOneReturnsTrue) +TEST_F(IsFastTaintedCheck, NonInternedUnicodeWithHashMinusOneReturnsTrue) { PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); reinterpret_cast(non_interned_unicode)->hash = -1; @@ -92,7 +83,7 @@ TEST_F(PyEnvTest, NonInternedUnicodeWithHashMinusOneReturnsTrue) Py_DECREF(non_interned_unicode); } -TEST_F(PyEnvTest, NonInternedUnicodeWithHiddenNotMatchingHashReturnsTrue) +TEST_F(IsFastTaintedCheck, NonInternedUnicodeWithHiddenNotMatchingHashReturnsTrue) { PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); reinterpret_cast(non_interned_unicode)->hash = 12345; @@ -102,7 +93,7 @@ TEST_F(PyEnvTest, NonInternedUnicodeWithHiddenNotMatchingHashReturnsTrue) Py_DECREF(non_interned_unicode); } -TEST_F(PyEnvTest, NonInternedUnicodeWithHiddenMatchingHashReturnsFalse) +TEST_F(IsFastTaintedCheck, NonInternedUnicodeWithHiddenMatchingHashReturnsFalse) { PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); reinterpret_cast(non_interned_unicode)->hash = 12345; @@ -111,3 +102,438 @@ TEST_F(PyEnvTest, NonInternedUnicodeWithHiddenMatchingHashReturnsFalse) EXPECT_FALSE(is_notinterned_notfasttainted_unicode(non_interned_unicode)); Py_DECREF(non_interned_unicode); } + +using SetFastTaintedCheck = PyEnvCheck; + +TEST_F(SetFastTaintedCheck, NullptrDoesNothing) +{ + set_fast_tainted_if_notinterned_unicode(nullptr); + // No assertion needed, just ensure no crash +} + +TEST_F(SetFastTaintedCheck, NonUnicodeDoesNothing) +{ + PyObject* non_unicode = PyLong_FromLong(42); + set_fast_tainted_if_notinterned_unicode(non_unicode); + // No assertion needed, just ensure no crash + Py_DECREF(non_unicode); +} + +TEST_F(SetFastTaintedCheck, InternedUnicodeDoesNothing) +{ + PyObject* interned_unicode = PyUnicode_InternFromString("interned"); + set_fast_tainted_if_notinterned_unicode(interned_unicode); + // No assertion needed, just ensure no crash + Py_DECREF(interned_unicode); +} + +TEST_F(SetFastTaintedCheck, NonInternedUnicodeSetsHidden) +{ + PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); + reinterpret_cast(non_interned_unicode)->hash = 12345; + set_fast_tainted_if_notinterned_unicode(non_interned_unicode); + const _PyASCIIObject_State_Hidden* e = + (_PyASCIIObject_State_Hidden*)&(((PyASCIIObject*)non_interned_unicode)->state); + EXPECT_EQ(e->hidden, GET_HASH_KEY(12345)); + Py_DECREF(non_interned_unicode); +} + +TEST_F(SetFastTaintedCheck, NonInternedUnicodeWithHashMinusOneSetsHidden) +{ + PyObject* non_interned_unicode = PyUnicode_FromString("non_interned"); + reinterpret_cast(non_interned_unicode)->hash = -1; + set_fast_tainted_if_notinterned_unicode(non_interned_unicode); + Py_hash_t hash = PyObject_Hash(non_interned_unicode); + const _PyASCIIObject_State_Hidden* e = + (_PyASCIIObject_State_Hidden*)&(((PyASCIIObject*)non_interned_unicode)->state); + EXPECT_EQ(e->hidden, GET_HASH_KEY(hash)); + Py_DECREF(non_interned_unicode); +} + +using IsTextCheck = PyEnvCheck; + +TEST_F(IsTextCheck, NullptrReturnsFalse) +{ + EXPECT_FALSE(is_text(nullptr)); +} + +TEST_F(IsTextCheck, UnicodeReturnsTrue) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + EXPECT_TRUE(is_text(unicode_obj)); + Py_DECREF(unicode_obj); +} + +TEST_F(IsTextCheck, BytesReturnsTrue) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + EXPECT_TRUE(is_text(bytes_obj)); + Py_DECREF(bytes_obj); +} + +TEST_F(IsTextCheck, ByteArrayReturnsTrue) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + EXPECT_TRUE(is_text(bytearray_obj)); + Py_DECREF(bytearray_obj); + Py_DECREF(bytes_obj); +} + +TEST_F(IsTextCheck, NonTextReturnsFalse) +{ + PyObject* non_text_obj = PyLong_FromLong(42); + EXPECT_FALSE(is_text(non_text_obj)); + Py_DECREF(non_text_obj); +} + +using IsTainteableCheck = PyEnvCheck; + +TEST_F(IsTainteableCheck, NullptrReturnsFalse) +{ + EXPECT_FALSE(is_tainteable(nullptr)); +} + +TEST_F(IsTainteableCheck, UnicodeReturnsTrue) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + EXPECT_TRUE(is_tainteable(unicode_obj)); + Py_DECREF(unicode_obj); +} + +TEST_F(IsTainteableCheck, BytesReturnsTrue) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + EXPECT_TRUE(is_tainteable(bytes_obj)); + Py_DECREF(bytes_obj); +} + +TEST_F(IsTainteableCheck, ByteArrayReturnsTrue) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + EXPECT_TRUE(is_tainteable(bytearray_obj)); + Py_DECREF(bytearray_obj); + Py_DECREF(bytes_obj); +} + +TEST_F(IsTainteableCheck, NonTextReturnsFalse) +{ + PyObject* non_text_obj = PyLong_FromLong(42); + EXPECT_FALSE(is_tainteable(non_text_obj)); + Py_DECREF(non_text_obj); +} + +TEST_F(IsTainteableCheck, ReMatchReturnsTrue) +{ + py::object re = py::module_::import("re"); + py::object match = re.attr("match")("a", "a"); + EXPECT_TRUE(is_tainteable(match.ptr())); +} + +using ArgsAreTextAndSameTypeCheck = PyEnvCheck; + +TEST_F(ArgsAreTextAndSameTypeCheck, NullptrReturnsFalse) +{ + EXPECT_FALSE(args_are_text_and_same_type(nullptr, nullptr)); +} + +TEST_F(ArgsAreTextAndSameTypeCheck, MixedTypesReturnFalse) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + PyObject* bytes_obj = PyBytes_FromString("test"); + EXPECT_FALSE(args_are_text_and_same_type(unicode_obj, bytes_obj)); + Py_DECREF(unicode_obj); + Py_DECREF(bytes_obj); +} + +TEST_F(ArgsAreTextAndSameTypeCheck, AllUnicodeReturnsTrue) +{ + PyObject* unicode_obj1 = PyUnicode_FromString("test1"); + PyObject* unicode_obj2 = PyUnicode_FromString("test2"); + EXPECT_TRUE(args_are_text_and_same_type(unicode_obj1, unicode_obj2)); + Py_DECREF(unicode_obj1); + Py_DECREF(unicode_obj2); +} + +TEST_F(ArgsAreTextAndSameTypeCheck, AllBytesReturnsTrue) +{ + PyObject* bytes_obj1 = PyBytes_FromString("test1"); + PyObject* bytes_obj2 = PyBytes_FromString("test2"); + EXPECT_TRUE(args_are_text_and_same_type(bytes_obj1, bytes_obj2)); + Py_DECREF(bytes_obj1); + Py_DECREF(bytes_obj2); +} + +TEST_F(ArgsAreTextAndSameTypeCheck, AllByteArrayReturnsTrue) +{ + PyObject* bytes_obj1 = PyBytes_FromString("test1"); + PyObject* bytearray_obj1 = PyByteArray_FromObject(bytes_obj1); + PyObject* bytes_obj2 = PyBytes_FromString("test2"); + PyObject* bytearray_obj2 = PyByteArray_FromObject(bytes_obj2); + EXPECT_TRUE(args_are_text_and_same_type(bytearray_obj1, bytearray_obj2)); + Py_DECREF(bytearray_obj1); + Py_DECREF(bytearray_obj2); + Py_DECREF(bytes_obj1); + Py_DECREF(bytes_obj2); +} + +TEST_F(ArgsAreTextAndSameTypeCheck, MixedTextTypesReturnFalse) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + PyObject* bytes_obj = PyBytes_FromString("test"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + EXPECT_FALSE(args_are_text_and_same_type(unicode_obj, bytes_obj, bytearray_obj)); + Py_DECREF(unicode_obj); + Py_DECREF(bytes_obj); + Py_DECREF(bytearray_obj); +} + +using PyObjectToStringCheck = PyEnvCheck; + +TEST_F(PyObjectToStringCheck, NullptrReturnsEmptyString) +{ + EXPECT_EQ(PyObjectToString(nullptr), string("")); +} + +TEST_F(PyObjectToStringCheck, UnicodeReturnsCorrectString) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + EXPECT_EQ(PyObjectToString(unicode_obj), string("test")); + Py_DECREF(unicode_obj); +} + +TEST_F(PyObjectToStringCheck, NonUnicodeReturnsEmptyString) +{ + PyObject* non_unicode_obj = PyLong_FromLong(42); + EXPECT_EQ(PyObjectToString(non_unicode_obj), string("")); + Py_DECREF(non_unicode_obj); +} + +using StringToPyObjectCheck = PyEnvCheck; + +TEST_F(StringToPyObjectCheck, ConvertsToUnicode) +{ + std::string test_str = "test"; + py::object py_obj = StringToPyObject(test_str, PyTextType::UNICODE); + EXPECT_TRUE(PyUnicode_Check(py_obj.ptr())); + EXPECT_EQ(PyUnicode_AsUTF8(py_obj.ptr()), test_str); +} + +TEST_F(StringToPyObjectCheck, ConvertsToBytes) +{ + std::string test_str = "test"; + py::object py_obj = StringToPyObject(test_str, PyTextType::BYTES); + EXPECT_TRUE(PyBytes_Check(py_obj.ptr())); + EXPECT_EQ(PyBytes_AsString(py_obj.ptr()), test_str); +} + +TEST_F(StringToPyObjectCheck, ConvertsToByteArray) +{ + std::string test_str = "test"; + py::object py_obj = StringToPyObject(test_str, PyTextType::BYTEARRAY); + EXPECT_TRUE(PyByteArray_Check(py_obj.ptr())); + EXPECT_EQ(PyByteArray_AsString(py_obj.ptr()), test_str); +} + +TEST_F(StringToPyObjectCheck, InvalidTypeReturnsNone) +{ + std::string test_str = "test"; + py::object py_obj = StringToPyObject(test_str, PyTextType::OTHER); + EXPECT_TRUE(py_obj.is_none()); +} + +using AnyTextObjectToStringCheck = PyEnvCheck; + +TEST_F(AnyTextObjectToStringCheck, UnicodeReturnsCorrectString) +{ + auto unicode_obj = py::str("test"); + EXPECT_EQ(AnyTextObjectToString(unicode_obj), string("test")); +} + +TEST_F(AnyTextObjectToStringCheck, BytesReturnsCorrectString) +{ + auto bytes_obj = py::bytes("test"); + EXPECT_EQ(AnyTextObjectToString(bytes_obj), string("test")); +} + +TEST_F(AnyTextObjectToStringCheck, ByteArrayReturnsCorrectString) +{ + auto bytearray_obj = py::bytearray("test"); + EXPECT_EQ(AnyTextObjectToString(bytearray_obj), string("test")); +} + +TEST_F(AnyTextObjectToStringCheck, NonTextReturnsEmptyString) +{ + auto non_text_obj = py::int_(42); + EXPECT_EQ(AnyTextObjectToString(non_text_obj), string("")); +} + +using PyObjectToPyTextCheck = PyEnvCheck; + +TEST_F(PyObjectToPyTextCheck, UnicodeReturnsPyStr) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + auto result = PyObjectToPyText(unicode_obj); + ASSERT_TRUE(result.has_value()); + EXPECT_TRUE(py::isinstance(result.value())); + Py_DECREF(unicode_obj); +} + +TEST_F(PyObjectToPyTextCheck, BytesReturnsPyBytes) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + auto result = PyObjectToPyText(bytes_obj); + ASSERT_TRUE(result.has_value()); + EXPECT_TRUE(py::isinstance(result.value())); + Py_DECREF(bytes_obj); +} + +TEST_F(PyObjectToPyTextCheck, ByteArrayReturnsPyByteArray) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + auto result = PyObjectToPyText(bytearray_obj); + ASSERT_TRUE(result.has_value()); + EXPECT_TRUE(py::isinstance(result.value())); + Py_DECREF(bytes_obj); + Py_DECREF(bytearray_obj); +} + +TEST_F(PyObjectToPyTextCheck, NonTextReturnsEmptyOptional) +{ + PyObject* non_text_obj = PyLong_FromLong(42); + auto result = PyObjectToPyText(non_text_obj); + EXPECT_FALSE(result.has_value()); + Py_DECREF(non_text_obj); +} + +using GetPyTextTypeCheck = PyEnvCheck; + +TEST_F(GetPyTextTypeCheck, UnicodeReturnsUnicodeType) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + EXPECT_EQ(get_pytext_type(unicode_obj), PyTextType::UNICODE); + Py_DECREF(unicode_obj); +} + +TEST_F(GetPyTextTypeCheck, BytesReturnsBytesType) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + EXPECT_EQ(get_pytext_type(bytes_obj), PyTextType::BYTES); + Py_DECREF(bytes_obj); +} + +TEST_F(GetPyTextTypeCheck, ByteArrayReturnsByteArrayType) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + EXPECT_EQ(get_pytext_type(bytearray_obj), PyTextType::BYTEARRAY); + Py_DECREF(bytearray_obj); + Py_DECREF(bytes_obj); +} + +TEST_F(GetPyTextTypeCheck, NonTextReturnsOtherType) +{ + PyObject* non_text_obj = PyLong_FromLong(42); + EXPECT_EQ(get_pytext_type(non_text_obj), PyTextType::OTHER); + Py_DECREF(non_text_obj); +} + +using NewPyObjectIdCheck = PyEnvCheck; + +TEST_F(NewPyObjectIdCheck, ValidTaintedUnicodeReturnsNewId) +{ + PyObject* tainted_obj = PyUnicode_FromString("tainted"); + PyObject* new_id_obj = new_pyobject_id(tainted_obj); + + ASSERT_NE(new_id_obj, nullptr); + ASSERT_NE(tainted_obj, new_id_obj); + EXPECT_TRUE(PyUnicode_Check(new_id_obj)); + EXPECT_STREQ(PyUnicode_AsUTF8(tainted_obj), PyUnicode_AsUTF8(new_id_obj)); + + Py_DECREF(tainted_obj); + Py_DECREF(new_id_obj); +} + +TEST_F(NewPyObjectIdCheck, ValidTaintedBytesReturnsNewId) +{ + PyObject* tainted_obj = PyBytes_FromString("tainted"); + PyObject* new_id_obj = new_pyobject_id(tainted_obj); + + ASSERT_NE(new_id_obj, nullptr); + ASSERT_NE(tainted_obj, new_id_obj); + EXPECT_TRUE(PyBytes_Check(new_id_obj)); + EXPECT_STREQ(PyBytes_AsString(tainted_obj), PyBytes_AsString(new_id_obj)); + + Py_DECREF(tainted_obj); + Py_DECREF(new_id_obj); +} + +TEST_F(NewPyObjectIdCheck, ValidTaintedByteArrayReturnsNewId) +{ + PyObject* bytes_obj = PyBytes_FromString("tainted"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + PyObject* new_id_obj = new_pyobject_id(bytearray_obj); + + ASSERT_NE(new_id_obj, nullptr); + ASSERT_NE(bytes_obj, new_id_obj); + ASSERT_NE(bytearray_obj, new_id_obj); + EXPECT_TRUE(PyByteArray_Check(new_id_obj)); + EXPECT_STREQ(PyByteArray_AsString(bytearray_obj), PyByteArray_AsString(new_id_obj)); + + Py_DECREF(bytearray_obj); + Py_DECREF(bytes_obj); +} + +TEST_F(NewPyObjectIdCheck, NullObjectReturnsNull) +{ + PyObject* new_id_obj = new_pyobject_id(nullptr); + EXPECT_EQ(new_id_obj, nullptr); +} + +using GetPyObjectSizeCheck = PyEnvCheck; + +TEST_F(GetPyObjectSizeCheck, UnicodeReturnsCorrectSize) +{ + PyObject* unicode_obj = PyUnicode_FromString("test"); + EXPECT_EQ(get_pyobject_size(unicode_obj), 4); + Py_DECREF(unicode_obj); +} + +TEST_F(GetPyObjectSizeCheck, NonSingleCodepointUnicodeReturnsCorrectSize) +{ + PyObject* unicode_obj = PyUnicode_FromString("𝄞𝄞"); // Musical symbol G clef (non-single codepoint) + EXPECT_EQ(get_pyobject_size(unicode_obj), 2); + Py_DECREF(unicode_obj); +} + +TEST_F(GetPyObjectSizeCheck, BytesReturnsCorrectSize) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + EXPECT_EQ(get_pyobject_size(bytes_obj), 4); + Py_DECREF(bytes_obj); +} + +TEST_F(GetPyObjectSizeCheck, NonSingleCodepointByteReturnsCorrectSize) +{ + PyObject* bytes_obj = PyBytes_FromString("𝄞𝄞"); + EXPECT_EQ(get_pyobject_size(bytes_obj), 8); + Py_DECREF(bytes_obj); +} + +TEST_F(GetPyObjectSizeCheck, ByteArrayReturnsCorrectSize) +{ + PyObject* bytes_obj = PyBytes_FromString("test"); + PyObject* bytearray_obj = PyByteArray_FromObject(bytes_obj); + EXPECT_EQ(get_pyobject_size(bytearray_obj), 4); + Py_DECREF(bytearray_obj); + Py_DECREF(bytes_obj); +} + +TEST_F(GetPyObjectSizeCheck, NonTextReturnsZero) +{ + PyObject* non_text_obj = PyLong_FromLong(42); + EXPECT_EQ(get_pyobject_size(non_text_obj), 0); + Py_DECREF(non_text_obj); +} From 8f81334cca3e67491e4a618eefbf54ebc21fb64b Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 10 Sep 2024 18:37:21 +0200 Subject: [PATCH 05/24] checkpoint Signed-off-by: Juanjo Alvarez --- .../_iast/_taint_tracking/Aspects/Helpers.h | 27 +++- .../_taint_tracking/tests/test_helpers.cpp | 140 ++++++++++++++++++ 2 files changed, 160 insertions(+), 7 deletions(-) create mode 100644 ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h index 200c3cd70c5..11e45c796f9 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h @@ -93,11 +93,10 @@ inline string get_tag(const string& content) { if (content.empty()) { - return string(EVIDENCE_MARKS::BLANK); + return { EVIDENCE_MARKS::BLANK }; } - auto result = string(EVIDENCE_MARKS::LESS) + content + string(EVIDENCE_MARKS::GREATER); - return result; + return string(EVIDENCE_MARKS::LESS) + content + string(EVIDENCE_MARKS::GREATER); } inline string @@ -112,19 +111,33 @@ get_default_content(const TaintRangePtr& taint_range) // TODO OPTIMIZATION: check if we can use instead a struct object with range_guid_map, new_ranges and default members so // we dont have to get the keys by string +/** + * @brief Replaces a taint range with a new range from the provided dictionary. + * + * This function takes a `TaintRangePtr` and an optional dictionary of new ranges. + * If the `taint_range` is found in the dictionary, it is replaced with the corresponding new range. + * If the `taint_range` is not found or if `new_ranges` is null, an empty string is returned. + * + * @param taint_range A shared pointer to the original taint range. + * @param new_ranges An optional dictionary containing new taint ranges. + * @return A string representation of the hash of the new taint range if replaced, otherwise an empty string. + */ inline string mapper_replace(const TaintRangePtr& taint_range, const optional& new_ranges) { - if (!taint_range or !new_ranges) { + + if (!taint_range or !new_ranges.has_value() or py::len(new_ranges.value()) == 0) { return {}; } + + const py::dict& new_ranges_value = new_ranges.value(); py::object o = py::cast(taint_range); - if (!new_ranges->contains(o)) { + if (!new_ranges_value.contains(o)) { return {}; } - const TaintRange new_range = py::cast((*new_ranges)[o]); - return to_string(new_range.get_hash()); + const TaintRangePtr new_range = py::cast(new_ranges_value[o]); + return to_string(new_range->get_hash()); } inline PyObject* diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp new file mode 100644 index 00000000000..7c23ac32656 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp @@ -0,0 +1,140 @@ +#include +#include +#include // JJJ +#include +#include + +#include + +namespace py = pybind11; + +class PyEnvCheck : public ::testing::Test +{ + protected: + void SetUp() override { py::initialize_interpreter(); } + + void TearDown() override { py::finalize_interpreter(); } +}; + +using HasPyErrCheck = PyEnvCheck; + +TEST_F(HasPyErrCheck, NoErrorReturnsFalse) +{ + EXPECT_FALSE(has_pyerr()); + EXPECT_EQ(has_pyerr_as_string(), ""); +} + +TEST_F(HasPyErrCheck, ErrorReturnsTrue) +{ + PyErr_SetString(PyExc_RuntimeError, "Test error"); + EXPECT_TRUE(has_pyerr()); + EXPECT_EQ(has_pyerr_as_string(), "Test error"); + PyErr_Clear(); +} + +TEST_F(HasPyErrCheck, ClearError) +{ + PyErr_SetString(PyExc_RuntimeError, "Test error"); + EXPECT_TRUE(has_pyerr()); + EXPECT_EQ(has_pyerr_as_string(), "Test error"); + + // Clear the error + PyErr_Clear(); + EXPECT_FALSE(has_pyerr()); + EXPECT_EQ(has_pyerr_as_string(), ""); +} + +using GetTagCheck = ::testing::Test; + +TEST_F(GetTagCheck, HandlesEmptyString) +{ + std::string input = ""; + std::string expected_output = EVIDENCE_MARKS::BLANK; + EXPECT_EQ(get_tag(input), expected_output); +} + +TEST_F(GetTagCheck, HandlesNonEmptyString) +{ + std::string input = "example"; + std::string expected_output = std::string(EVIDENCE_MARKS::LESS) + "example" + std::string(EVIDENCE_MARKS::GREATER); + EXPECT_EQ(get_tag(input), expected_output); +} + +TEST_F(GetTagCheck, HandlesSpecialCharacters) +{ + std::string input = "special!@#"; + std::string expected_output = + std::string(EVIDENCE_MARKS::LESS) + "special!@#" + std::string(EVIDENCE_MARKS::GREATER); + EXPECT_EQ(get_tag(input), expected_output); +} +using GetDefaultContentCheck = ::testing::Test; + +TEST_F(GetDefaultContentCheck, HandlesEmptySourceName) +{ + TaintRangePtr taint_range = std::make_shared(); + taint_range->source.name = ""; + std::string expected_output = ""; + EXPECT_EQ(get_default_content(taint_range), expected_output); +} + +TEST_F(GetDefaultContentCheck, HandlesNonEmptySourceName) +{ + TaintRangePtr taint_range = std::make_shared(); + taint_range->source.name = "example"; + std::string expected_output = "example"; + EXPECT_EQ(get_default_content(taint_range), expected_output); +} + +TEST_F(GetDefaultContentCheck, HandlesSpecialCharactersInSourceName) +{ + TaintRangePtr taint_range = std::make_shared(); + taint_range->source.name = "special!@#"; + std::string expected_output = "special!@#"; + EXPECT_EQ(get_default_content(taint_range), expected_output); +} + +using MapperReplaceCheck = PyEnvCheck; + +TEST_F(MapperReplaceCheck, HandlesNullTaintRange) +{ + optional new_ranges = py::dict(); + EXPECT_EQ(mapper_replace(nullptr, new_ranges), ""); +} + +TEST_F(MapperReplaceCheck, HandlesNullNewRanges) +{ + TaintRangePtr taint_range = std::make_shared(); + EXPECT_EQ(mapper_replace(taint_range, nullopt), ""); +} + +TEST_F(MapperReplaceCheck, HandlesNonExistingRange) +{ + TaintRangePtr taint_range = std::make_shared(); + optional new_ranges = py::dict(); + EXPECT_EQ(mapper_replace(taint_range, new_ranges), ""); +} + +TEST_F(MapperReplaceCheck, HandlesExistingRange) +{ + cerr << "JJJ 1\n"; + TaintRangePtr taint_range = std::make_shared(); + cerr << "JJJ 2\n"; + taint_range->start = 0; + taint_range->length = 5; + taint_range->source.name = "example"; + cerr << "JJJ 3\n"; + + TaintRangePtr new_range = std::make_shared(); + cerr << "JJJ 4\n"; + new_range->start = 0; + new_range->length = 5; + new_range->source.name = "new_example"; + cerr << "JJJ 5\n"; + + py::dict new_ranges; + cerr << "JJJ 5.1\n"; + new_ranges[py::cast(taint_range)] = py::cast(new_range); + cerr << "JJJ 7\n"; + + EXPECT_EQ(mapper_replace(taint_range, new_ranges), std::to_string(new_range->get_hash())); +} \ No newline at end of file From f50cbf3f9b13d008faf1e09cc888ce1ee512fa9d Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 10 Sep 2024 18:52:05 +0200 Subject: [PATCH 06/24] checkpoint Signed-off-by: Juanjo Alvarez --- .../_iast/_taint_tracking/Aspects/Helpers.cpp | 16 ------- .../_iast/_taint_tracking/Aspects/Helpers.h | 21 +++++++- .../_taint_tracking/tests/test_helpers.cpp | 48 +++++++++++++++---- 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp index 23e43332f18..dc16e699295 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp @@ -200,22 +200,6 @@ api_convert_escaped_text_to_taint_text(PyObject* taint_escaped_text, } } -unsigned long int -getNum(const std::string& s) -{ - unsigned int n = -1; - try { - n = std::stoul(s, nullptr, 10); - if (errno != 0) { - PyErr_Print(); - } - } catch (std::exception&) { - // throw std::invalid_argument("Value is too big"); - PyErr_Print(); - } - return n; -} - template std::tuple convert_escaped_text_to_taint_text(const StrType& taint_escaped_text, const TaintRangeRefs& ranges_orig) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h index 11e45c796f9..5936d2d7f26 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h @@ -136,8 +136,25 @@ mapper_replace(const TaintRangePtr& taint_range, const optional& if (!new_ranges_value.contains(o)) { return {}; } - const TaintRangePtr new_range = py::cast(new_ranges_value[o]); - return to_string(new_range->get_hash()); + const TaintRange new_range = py::cast((*new_ranges)[o]); + return to_string(new_range.get_hash()); +} + +// FIXME: maybe using an "unsigned" -1 as flag is not the best idea... +inline unsigned long int +getNum(const std::string& s) +{ + unsigned long int n = -1; + try { + n = std::stoul(s, nullptr, 10); + if (errno != 0) { + PyErr_Print(); + } + } catch (std::exception&) { + // throw std::invalid_argument("Value is too big"); + PyErr_Print(); + } + return n; } inline PyObject* diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp index 7c23ac32656..94391bd185b 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp @@ -1,6 +1,5 @@ #include #include -#include // JJJ #include #include @@ -114,27 +113,58 @@ TEST_F(MapperReplaceCheck, HandlesNonExistingRange) EXPECT_EQ(mapper_replace(taint_range, new_ranges), ""); } -TEST_F(MapperReplaceCheck, HandlesExistingRange) +// FIXME: not working, check with Alberto +TEST_F(MapperReplaceCheck, DISABLED_HandlesExistingRange) { - cerr << "JJJ 1\n"; TaintRangePtr taint_range = std::make_shared(); - cerr << "JJJ 2\n"; taint_range->start = 0; taint_range->length = 5; taint_range->source.name = "example"; - cerr << "JJJ 3\n"; TaintRangePtr new_range = std::make_shared(); - cerr << "JJJ 4\n"; new_range->start = 0; new_range->length = 5; new_range->source.name = "new_example"; - cerr << "JJJ 5\n"; py::dict new_ranges; - cerr << "JJJ 5.1\n"; new_ranges[py::cast(taint_range)] = py::cast(new_range); - cerr << "JJJ 7\n"; EXPECT_EQ(mapper_replace(taint_range, new_ranges), std::to_string(new_range->get_hash())); +} + +using GetNumTest = PyEnvCheck; + +TEST_F(GetNumTest, ValidNumber) +{ + std::string valid_str = "12345"; + unsigned long int result = getNum(valid_str); + EXPECT_EQ(result, 12345); +} + +TEST_F(GetNumTest, EmptyString) +{ + std::string empty_str = ""; + unsigned long int result = getNum(empty_str); + EXPECT_EQ(result, static_cast(-1)); +} + +TEST_F(GetNumTest, InvalidString) +{ + std::string invalid_str = "abc"; + unsigned long int result = getNum(invalid_str); + EXPECT_EQ(result, static_cast(-1)); +} + +TEST_F(GetNumTest, OutOfRangeNumber) +{ + std::string out_of_range_str = "999999999999999999999999"; + unsigned long int result = getNum(out_of_range_str); + EXPECT_EQ(result, static_cast(-1)); // Should return -1 due to exception +} + +TEST_F(GetNumTest, MaxUnsignedLong) +{ + std::string max_ulong_str = std::to_string(ULONG_MAX); + unsigned long int result = getNum(max_ulong_str); + EXPECT_EQ(result, ULONG_MAX); } \ No newline at end of file From e8871ea6d3c0079612086d9819b97d9abcddf316 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 11 Sep 2024 12:14:10 +0200 Subject: [PATCH 07/24] more tests and fixes Signed-off-by: Juanjo Alvarez --- .../_iast/_taint_tracking/Aspects/Helpers.cpp | 45 ++-- .../_iast/_taint_tracking/Aspects/Helpers.h | 18 +- .../_iast/_taint_tracking/CMakeLists.txt | 8 +- .../_iast/_taint_tracking/Utils/StringUtils.h | 5 +- .../_taint_tracking/tests/test_helpers.cpp | 211 ++++++++++++++++-- .../tests/test_stringutils.cpp | 20 +- 6 files changed, 240 insertions(+), 67 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp index dc16e699295..7f478780278 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp @@ -44,7 +44,11 @@ as_formatted_evidence(const string& text, const optional& tag_mapping_mode, const optional& new_ranges) { - if (text_ranges.empty()) { + if (const auto tx_map = Initializer::get_tainting_map(); !tx_map) { + return text; + } + + if (text_ranges.empty() or text.empty()) { return text; } vector res_vector; @@ -55,20 +59,23 @@ as_formatted_evidence(const string& text, for (const auto& taint_range : text_ranges) { string content; - if (!tag_mapping_mode) { + if (!tag_mapping_mode or tag_mapping_mode.value() == TagMappingMode::Normal) { content = get_default_content(taint_range); } else switch (*tag_mapping_mode) { - case TagMappingMode::Mapper: + case TagMappingMode::Mapper: { content = to_string(taint_range->get_hash()); break; - case TagMappingMode::Mapper_Replace: + } + case TagMappingMode::Mapper_Replace: { content = mapper_replace(taint_range, new_ranges); break; + } default: { // Nothing } } + const auto tag = get_tag(content); const auto range_end = taint_range->start + taint_range->length; @@ -91,21 +98,6 @@ as_formatted_evidence(const string& text, return oss.str(); } -template -StrType -all_as_formatted_evidence(const StrType& text, TagMappingMode tag_mapping_mode) -{ - TaintRangeRefs text_ranges = api_get_ranges(text); - return StrType(as_formatted_evidence(AnyTextObjectToString(text), text_ranges, tag_mapping_mode, nullopt)); -} - -template -StrType -int_as_formatted_evidence(const StrType& text, TaintRangeRefs& text_ranges, TagMappingMode tag_mapping_mode) -{ - return StrType(as_formatted_evidence(AnyTextObjectToString(text), text_ranges, tag_mapping_mode, nullopt)); -} - template StrType api_as_formatted_evidence(const StrType& text, @@ -113,6 +105,10 @@ api_as_formatted_evidence(const StrType& text, const optional& tag_mapping_mode, const optional& new_ranges) { + if (const auto tx_map = Initializer::get_tainting_map(); !tx_map) { + return text; + } + TaintRangeRefs _ranges; if (!text_ranges) { _ranges = api_get_ranges(text); @@ -454,17 +450,6 @@ pyexport_aspect_helpers(py::module& m) "split_result"_a, // cppcheck-suppress assignBoolToPointer "include_separator"_a = false); - m.def("_all_as_formatted_evidence", - &all_as_formatted_evidence, - "text"_a, - "tag_mapping_function"_a = nullopt, - py::return_value_policy::move); - m.def("_int_as_formatted_evidence", - &int_as_formatted_evidence, - "text"_a, - "text_ranges"_a = nullopt, - "tag_mapping_function"_a = nullopt, - py::return_value_policy::move); m.def("as_formatted_evidence", &api_as_formatted_evidence, "text"_a, diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h index 5936d2d7f26..9edecd40c00 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h @@ -3,6 +3,7 @@ #include #include +#include "Initializer/Initializer.h" #include "TaintTracking/TaintRange.h" using namespace pybind11::literals; @@ -20,11 +21,24 @@ api_common_replace(const py::str& string_method, template StrType -all_as_formatted_evidence(const StrType& text, TagMappingMode tag_mapping_mode); +all_as_formatted_evidence(const StrType& text, TagMappingMode tag_mapping_mode) +{ + if (const auto tx_map = Initializer::get_tainting_map(); !tx_map) { + return text; + } + TaintRangeRefs text_ranges = api_get_ranges(text); + return StrType(as_formatted_evidence(AnyTextObjectToString(text), text_ranges, tag_mapping_mode, nullopt)); +} template StrType -int_as_formatted_evidence(const StrType& text, TaintRangeRefs& text_ranges, TagMappingMode tag_mapping_mode); +int_as_formatted_evidence(const StrType& text, TaintRangeRefs& text_ranges, TagMappingMode tag_mapping_mode) +{ + if (const auto tx_map = Initializer::get_tainting_map(); !tx_map) { + return text; + } + return StrType(as_formatted_evidence(AnyTextObjectToString(text), text_ranges, tag_mapping_mode, nullopt)); +} string as_formatted_evidence(const string& text, diff --git a/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt b/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt index f5cece87149..29061150d6d 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt +++ b/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt @@ -34,12 +34,6 @@ else () message("Debug mode: not using abseil") endif () -FetchContent_Declare( - googletest - URL https://github.com/google/googletest/archive/refs/tags/release-1.12.1.zip -) -FetchContent_MakeAvailable(googletest) - include_directories(".") file(GLOB SOURCE_FILES "*.cpp" @@ -72,7 +66,7 @@ message(STATUS "Python_EXECUTABLE = ${Python_EXECUTABLE}") add_subdirectory(_vendor/pybind11) if (NATIVE_TESTING) add_subdirectory(tests EXCLUDE_FROM_ALL) -endif() +endif () pybind11_add_module(_native SHARED ${SOURCE_FILES} ${HEADER_FILES}) get_filename_component(PARENT_DIR ${CMAKE_CURRENT_LIST_DIR} DIRECTORY) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h index 13389d01767..6ff008337c4 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h @@ -47,10 +47,7 @@ set_fast_tainted_if_notinterned_unicode(PyObject* objptr); inline bool is_text(const PyObject* pyptr) { - if (!pyptr) - return false; - - return PyUnicode_Check(pyptr) || PyBytes_Check(pyptr) || PyByteArray_Check(pyptr); + return (pyptr != nullptr) and (PyUnicode_Check(pyptr) or PyBytes_Check(pyptr) or PyByteArray_Check(pyptr)); } inline bool diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp index 94391bd185b..bd88a864088 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp @@ -1,9 +1,12 @@ #include #include +#include #include #include #include +#include +#include namespace py = pybind11; @@ -15,19 +18,36 @@ class PyEnvCheck : public ::testing::Test void TearDown() override { py::finalize_interpreter(); } }; +class PyEnvWithContext : public ::testing::Test +{ + protected: + void SetUp() override + { + initializer = make_unique(); + py::initialize_interpreter(); + initializer->create_context(); + } + + void TearDown() override + { + initializer->reset_context(); + py::finalize_interpreter(); + } +}; + using HasPyErrCheck = PyEnvCheck; TEST_F(HasPyErrCheck, NoErrorReturnsFalse) { EXPECT_FALSE(has_pyerr()); - EXPECT_EQ(has_pyerr_as_string(), ""); + EXPECT_STREQ(has_pyerr_as_string().c_str(), ""); } TEST_F(HasPyErrCheck, ErrorReturnsTrue) { PyErr_SetString(PyExc_RuntimeError, "Test error"); EXPECT_TRUE(has_pyerr()); - EXPECT_EQ(has_pyerr_as_string(), "Test error"); + EXPECT_STREQ(has_pyerr_as_string().c_str(), "Test error"); PyErr_Clear(); } @@ -35,12 +55,12 @@ TEST_F(HasPyErrCheck, ClearError) { PyErr_SetString(PyExc_RuntimeError, "Test error"); EXPECT_TRUE(has_pyerr()); - EXPECT_EQ(has_pyerr_as_string(), "Test error"); + EXPECT_STREQ(has_pyerr_as_string().c_str(), "Test error"); // Clear the error PyErr_Clear(); EXPECT_FALSE(has_pyerr()); - EXPECT_EQ(has_pyerr_as_string(), ""); + EXPECT_STREQ(has_pyerr_as_string().c_str(), ""); } using GetTagCheck = ::testing::Test; @@ -49,14 +69,14 @@ TEST_F(GetTagCheck, HandlesEmptyString) { std::string input = ""; std::string expected_output = EVIDENCE_MARKS::BLANK; - EXPECT_EQ(get_tag(input), expected_output); + EXPECT_STREQ(get_tag(input).c_str(), expected_output.c_str()); } TEST_F(GetTagCheck, HandlesNonEmptyString) { std::string input = "example"; std::string expected_output = std::string(EVIDENCE_MARKS::LESS) + "example" + std::string(EVIDENCE_MARKS::GREATER); - EXPECT_EQ(get_tag(input), expected_output); + EXPECT_STREQ(get_tag(input).c_str(), expected_output.c_str()); } TEST_F(GetTagCheck, HandlesSpecialCharacters) @@ -64,7 +84,7 @@ TEST_F(GetTagCheck, HandlesSpecialCharacters) std::string input = "special!@#"; std::string expected_output = std::string(EVIDENCE_MARKS::LESS) + "special!@#" + std::string(EVIDENCE_MARKS::GREATER); - EXPECT_EQ(get_tag(input), expected_output); + EXPECT_STREQ(get_tag(input).c_str(), expected_output.c_str()); } using GetDefaultContentCheck = ::testing::Test; @@ -73,7 +93,7 @@ TEST_F(GetDefaultContentCheck, HandlesEmptySourceName) TaintRangePtr taint_range = std::make_shared(); taint_range->source.name = ""; std::string expected_output = ""; - EXPECT_EQ(get_default_content(taint_range), expected_output); + EXPECT_STREQ(get_default_content(taint_range).c_str(), expected_output.c_str()); } TEST_F(GetDefaultContentCheck, HandlesNonEmptySourceName) @@ -81,7 +101,7 @@ TEST_F(GetDefaultContentCheck, HandlesNonEmptySourceName) TaintRangePtr taint_range = std::make_shared(); taint_range->source.name = "example"; std::string expected_output = "example"; - EXPECT_EQ(get_default_content(taint_range), expected_output); + EXPECT_STREQ(get_default_content(taint_range).c_str(), expected_output.c_str()); } TEST_F(GetDefaultContentCheck, HandlesSpecialCharactersInSourceName) @@ -89,7 +109,7 @@ TEST_F(GetDefaultContentCheck, HandlesSpecialCharactersInSourceName) TaintRangePtr taint_range = std::make_shared(); taint_range->source.name = "special!@#"; std::string expected_output = "special!@#"; - EXPECT_EQ(get_default_content(taint_range), expected_output); + EXPECT_STREQ(get_default_content(taint_range).c_str(), expected_output.c_str()); } using MapperReplaceCheck = PyEnvCheck; @@ -97,20 +117,20 @@ using MapperReplaceCheck = PyEnvCheck; TEST_F(MapperReplaceCheck, HandlesNullTaintRange) { optional new_ranges = py::dict(); - EXPECT_EQ(mapper_replace(nullptr, new_ranges), ""); + EXPECT_STREQ(mapper_replace(nullptr, new_ranges).c_str(), ""); } TEST_F(MapperReplaceCheck, HandlesNullNewRanges) { TaintRangePtr taint_range = std::make_shared(); - EXPECT_EQ(mapper_replace(taint_range, nullopt), ""); + EXPECT_STREQ(mapper_replace(taint_range, nullopt).c_str(), ""); } TEST_F(MapperReplaceCheck, HandlesNonExistingRange) { TaintRangePtr taint_range = std::make_shared(); optional new_ranges = py::dict(); - EXPECT_EQ(mapper_replace(taint_range, new_ranges), ""); + EXPECT_STREQ(mapper_replace(taint_range, new_ranges).c_str(), ""); } // FIXME: not working, check with Alberto @@ -129,7 +149,7 @@ TEST_F(MapperReplaceCheck, DISABLED_HandlesExistingRange) py::dict new_ranges; new_ranges[py::cast(taint_range)] = py::cast(new_range); - EXPECT_EQ(mapper_replace(taint_range, new_ranges), std::to_string(new_range->get_hash())); + EXPECT_STREQ(mapper_replace(taint_range, new_ranges).c_str(), std::to_string(new_range->get_hash()).c_str()); } using GetNumTest = PyEnvCheck; @@ -167,4 +187,167 @@ TEST_F(GetNumTest, MaxUnsignedLong) std::string max_ulong_str = std::to_string(ULONG_MAX); unsigned long int result = getNum(max_ulong_str); EXPECT_EQ(result, ULONG_MAX); +} + +using AsFormattedEvidenceCheck = PyEnvWithContext; +using AsFormattedEvidenceCheckNoContext = PyEnvCheck; + +TEST_F(AsFormattedEvidenceCheckNoContext, NoTaintMapSameString) +{ + const py::str text("This is a test string."); + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 4, source) }; + const py::str result = as_formatted_evidence(text, taint_ranges); + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); +} + +TEST_F(AsFormattedEvidenceCheck, NoTaintRanges) +{ + std::string text = "This is a test string."; + TaintRangeRefs taint_ranges; // Empty ranges + std::string result = as_formatted_evidence(text, taint_ranges, std::nullopt); + EXPECT_STREQ(result.c_str(), text.c_str()); +} + +TEST_F(AsFormattedEvidenceCheck, SingleTaintRangeWithNoMapper) +{ + const std::string text = "This is a test string."; + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 4, source) }; + const std::string expected_result = "This :+-is a-+: test string."; // Expected tagged output + const std::string result = as_formatted_evidence(text, taint_ranges, nullopt, nullopt); + EXPECT_STREQ(result.c_str(), expected_result.c_str()); +} + +TEST_F(AsFormattedEvidenceCheck, MultipleTaintRangesWithNoMapper) +{ + const std::string text = "This is a test string."; + Source source1("source1", "sample_value1", OriginType::BODY); + Source source2("source2", "sample_value2", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 2, source1), + std::make_shared(10, 4, source2) }; + const std::string expected_result = "This :+-is-+: a :+-test-+: string."; + const std::string result = as_formatted_evidence(text, taint_ranges, nullopt, nullopt); + EXPECT_STREQ(result.c_str(), expected_result.c_str()); +} + +TEST_F(AsFormattedEvidenceCheck, DefaultTagMappingModeIsMapper) +{ + const std::string text = "This is a test string."; + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 2, source) }; + + const std::string expected_result = "This :+-<3485454368>is<3485454368>-+: a test string."; + const std::string result = as_formatted_evidence(text, taint_ranges); + EXPECT_STREQ(result.c_str(), expected_result.c_str()); +} + +TEST_F(AsFormattedEvidenceCheck, MultipleRangesWithMapper) +{ + const std::string text = "This is a test string."; + Source source1("source1", "sample_value", OriginType::BODY); + Source source2("source2", "sample_value", OriginType::PARAMETER); + TaintRangeRefs taint_ranges = { + std::make_shared(5, 2, source1), + std::make_shared(10, 4, source2), + }; + + const std::string expected_result = + "This :+-<3485454368>is<3485454368>-+: a :+-<891889858>test<891889858>-+: string."; + const std::string result = as_formatted_evidence(text, taint_ranges); + EXPECT_STREQ(result.c_str(), expected_result.c_str()); +} + +// FIXME: same problem as mapper_replace test above +TEST_F(AsFormattedEvidenceCheck, DISABLED_SingleTaintRangeWithMapperReplace) +{ + const std::string text = "This is a test string."; + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 2, source) }; + + py::dict new_ranges; + TaintRange new_range(5, 2, Source("new_source", "sample_value", OriginType::BODY)); + new_ranges[py::cast(taint_ranges[0])] = new_range; + + const std::string expected_result = "This :+-is-+: a test string."; + const std::string result = as_formatted_evidence(text, taint_ranges, TagMappingMode::Mapper_Replace, new_ranges); + EXPECT_STREQ(result.c_str(), expected_result.c_str()); +} + +TEST_F(AsFormattedEvidenceCheck, EmptyTextWithTaintRanges) +{ + const std::string text; + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(0, 1, source) }; + const std::string expected_result; + const std::string result = as_formatted_evidence(text, taint_ranges, nullopt, nullopt); + EXPECT_STREQ(result.c_str(), expected_result.c_str()); +} + +using AllAsFormattedEvidenceCheck = PyEnvWithContext; +using AllAsFormattedEvidenceCheckNoContext = PyEnvCheck; + +TEST_F(AllAsFormattedEvidenceCheckNoContext, NoTaintMapSameString) +{ + const py::str text("This is a test string."); + const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper); + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); +} + +TEST_F(AllAsFormattedEvidenceCheck, NoRangesSameString) +{ + const py::str text("This is a test string."); + const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper); + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); +} + +TEST_F(AllAsFormattedEvidenceCheck, SingleTaintRangeWithNormalMapper) +{ + py::str text("This is a test string."); + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 2, source) }; + api_set_ranges(text, taint_ranges); + + const py::str expected_result("This :+-is-+: a test string."); + const py::str result = all_as_formatted_evidence(text, TagMappingMode::Normal); + + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(expected_result).c_str()); +} + +TEST_F(AllAsFormattedEvidenceCheck, SingleTaintRangeWithMapper) +{ + py::str text("This is a test string."); + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 2, source) }; + api_set_ranges(text, taint_ranges); + + const py::str expected_result("This :+-<3485454368>is<3485454368>-+: a test string."); + const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper); + + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(expected_result).c_str()); +} + +// See above +TEST_F(AllAsFormattedEvidenceCheck, DISABLED_SingleTaintRangeWithMapperReplace) +{ + py::str text("This is a test string."); + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs taint_ranges = { std::make_shared(5, 2, source) }; + api_set_ranges(text, taint_ranges); + + py::dict new_ranges; + TaintRange new_range(5, 2, Source("new_source", "sample_value", OriginType::BODY)); + new_ranges[py::cast(taint_ranges[0])] = new_range; + + const py::str expected_result("This :+-is-+: a test string."); + const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper_Replace); + + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(expected_result).c_str()); +} + +TEST_F(AllAsFormattedEvidenceCheck, EmptyText) +{ + const py::str text(""); + const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper); + EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); } \ No newline at end of file diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp index 0e15f9515d6..92a30f2e0f2 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_stringutils.cpp @@ -293,20 +293,20 @@ using PyObjectToStringCheck = PyEnvCheck; TEST_F(PyObjectToStringCheck, NullptrReturnsEmptyString) { - EXPECT_EQ(PyObjectToString(nullptr), string("")); + EXPECT_STREQ(PyObjectToString(nullptr).c_str(), ""); } TEST_F(PyObjectToStringCheck, UnicodeReturnsCorrectString) { PyObject* unicode_obj = PyUnicode_FromString("test"); - EXPECT_EQ(PyObjectToString(unicode_obj), string("test")); + EXPECT_STREQ(PyObjectToString(unicode_obj).c_str(), "test"); Py_DECREF(unicode_obj); } TEST_F(PyObjectToStringCheck, NonUnicodeReturnsEmptyString) { PyObject* non_unicode_obj = PyLong_FromLong(42); - EXPECT_EQ(PyObjectToString(non_unicode_obj), string("")); + EXPECT_STREQ(PyObjectToString(non_unicode_obj).c_str(), ""); Py_DECREF(non_unicode_obj); } @@ -317,7 +317,7 @@ TEST_F(StringToPyObjectCheck, ConvertsToUnicode) std::string test_str = "test"; py::object py_obj = StringToPyObject(test_str, PyTextType::UNICODE); EXPECT_TRUE(PyUnicode_Check(py_obj.ptr())); - EXPECT_EQ(PyUnicode_AsUTF8(py_obj.ptr()), test_str); + EXPECT_STREQ(PyUnicode_AsUTF8(py_obj.ptr()), test_str.c_str()); } TEST_F(StringToPyObjectCheck, ConvertsToBytes) @@ -325,7 +325,7 @@ TEST_F(StringToPyObjectCheck, ConvertsToBytes) std::string test_str = "test"; py::object py_obj = StringToPyObject(test_str, PyTextType::BYTES); EXPECT_TRUE(PyBytes_Check(py_obj.ptr())); - EXPECT_EQ(PyBytes_AsString(py_obj.ptr()), test_str); + EXPECT_STREQ(PyBytes_AsString(py_obj.ptr()), test_str.c_str()); } TEST_F(StringToPyObjectCheck, ConvertsToByteArray) @@ -333,7 +333,7 @@ TEST_F(StringToPyObjectCheck, ConvertsToByteArray) std::string test_str = "test"; py::object py_obj = StringToPyObject(test_str, PyTextType::BYTEARRAY); EXPECT_TRUE(PyByteArray_Check(py_obj.ptr())); - EXPECT_EQ(PyByteArray_AsString(py_obj.ptr()), test_str); + EXPECT_STREQ(PyByteArray_AsString(py_obj.ptr()), test_str.c_str()); } TEST_F(StringToPyObjectCheck, InvalidTypeReturnsNone) @@ -348,25 +348,25 @@ using AnyTextObjectToStringCheck = PyEnvCheck; TEST_F(AnyTextObjectToStringCheck, UnicodeReturnsCorrectString) { auto unicode_obj = py::str("test"); - EXPECT_EQ(AnyTextObjectToString(unicode_obj), string("test")); + EXPECT_STREQ(AnyTextObjectToString(unicode_obj).c_str(), "test"); } TEST_F(AnyTextObjectToStringCheck, BytesReturnsCorrectString) { auto bytes_obj = py::bytes("test"); - EXPECT_EQ(AnyTextObjectToString(bytes_obj), string("test")); + EXPECT_STREQ(AnyTextObjectToString(bytes_obj).c_str(), "test"); } TEST_F(AnyTextObjectToStringCheck, ByteArrayReturnsCorrectString) { auto bytearray_obj = py::bytearray("test"); - EXPECT_EQ(AnyTextObjectToString(bytearray_obj), string("test")); + EXPECT_STREQ(AnyTextObjectToString(bytearray_obj).c_str(), "test"); } TEST_F(AnyTextObjectToStringCheck, NonTextReturnsEmptyString) { auto non_text_obj = py::int_(42); - EXPECT_EQ(AnyTextObjectToString(non_text_obj), string("")); + EXPECT_STREQ(AnyTextObjectToString(non_text_obj).c_str(), ""); } using PyObjectToPyTextCheck = PyEnvCheck; From 4a380b724a40a7d390867ef6f72c973ac44a15e3 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 11 Sep 2024 13:29:59 +0200 Subject: [PATCH 08/24] Added remaining tests for Helper.cpp Signed-off-by: Juanjo Alvarez --- .../_iast/_taint_tracking/Aspects/Helpers.cpp | 31 -- .../_iast/_taint_tracking/Aspects/Helpers.h | 70 ++++ .../_taint_tracking/tests/test_helpers.cpp | 348 ++++++++++++++++++ 3 files changed, 418 insertions(+), 31 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp index 7f478780278..da747daa8b9 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp @@ -2,7 +2,6 @@ #include "Initializer/Initializer.h" #include #include -#include using namespace pybind11::literals; namespace py = pybind11; @@ -118,20 +117,6 @@ api_as_formatted_evidence(const StrType& text, return StrType(as_formatted_evidence(AnyTextObjectToString(text), _ranges, tag_mapping_mode, new_ranges)); } -vector -split_taints(const string& str_to_split) -{ - const std::regex rgx(R"((:\+-(<[0-9.a-z\-]+>)?|(<[0-9.a-z\-]+>)?-\+:))"); - std::sregex_token_iterator iter(str_to_split.begin(), str_to_split.end(), rgx, { -1, 0 }); - vector res; - - for (const std::sregex_token_iterator end; iter != end; ++iter) { - res.push_back(*iter); - } - - return res; -} - py::bytearray api_convert_escaped_text_to_taint_text(const py::bytearray& taint_escaped_text, const TaintRangeRefs& ranges_orig) { @@ -369,22 +354,6 @@ api_set_ranges_on_splitted(const StrType& source_str, return set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, include_separator); } -py::object -parse_params(size_t position, - const char* keyword_name, - const py::object& default_value, - const py::args& args, - const py::kwargs& kwargs) -{ - if (args.size() >= position + 1) { - return args[position]; - } - if (kwargs && kwargs.contains(keyword_name)) { - return kwargs[keyword_name]; - } - return default_value; -} - bool has_pyerr() { diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h index 9edecd40c00..84cfd09c733 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h @@ -2,6 +2,7 @@ #include #include +#include #include "Initializer/Initializer.h" #include "TaintTracking/TaintRange.h" @@ -206,6 +207,75 @@ process_flag_added_args(PyObject* orig_function, const int flag_added_args, PyOb return args; } +/** + * @brief Splits a string containing taint markers into its textual components and the markers. + * + * This function takes a string that contains special taint markers (e.g., `:+-<...>-+:`) and splits it + * into separate components: the plain text parts and the taint markers. The markers represent taint information + * surrounding sections of the string, and the result is a vector where both text and markers are included as separate + * elements. + * + * @param str_to_split The input string containing taint markers. + * + * @return A vector of strings where each element is either a part of the original text or a taint marker. + * + * @example + * std::string tainted_str = "This :+-<123>-+:is a :+-<456>-+:test."; + * std::vector result = split_taints(tainted_str); + * // result will be: ["This ", ":+-<123>-+:", "is a ", ":+-<456>-+:", "test."] + */ +inline vector +split_taints(const string& str_to_split) +{ + const std::regex rgx(R"((:\+-(<[0-9.a-z\-]+>)?|(<[0-9.a-z\-]+>)?-\+:))"); + std::sregex_token_iterator iter(str_to_split.begin(), str_to_split.end(), rgx, { -1, 0 }); + vector res; + + for (const std::sregex_token_iterator end; iter != end; ++iter) { + res.push_back(*iter); + } + + return res; +} + +/** + * @brief Retrieves a parameter from either the positional arguments, keyword arguments, or returns a default value. + * + * This function checks if a value is provided in the positional arguments (`args`) at the specified position. + * If not found, it checks the keyword arguments (`kwargs`) for the specified key. If neither is found, + * it returns the default value provided. + * + * @param position The position in the positional arguments (`args`) to check. + * @param keyword_name The name of the keyword to search for in the keyword arguments (`kwargs`). + * @param default_value The default value to return if the argument is not found in either `args` or `kwargs`. + * @param args The list of positional arguments. + * @param kwargs The dictionary of keyword arguments. + * + * @return The parameter found in the positional arguments, keyword arguments, or the default value if none is found. + * + * @example + * py::args args = py::make_tuple(42); + * py::kwargs kwargs; + * py::object default_value = py::int_(0); + * py::object result = parse_params(0, "key", default_value, args, kwargs); + * // In this case, the result will be 42 (the positional argument). + */ +inline py::object +parse_params(size_t position, + const char* keyword_name, + const py::object& default_value, + const py::args& args, + const py::kwargs& kwargs) +{ + if (args.size() >= position + 1) { + return args[position]; + } + if (kwargs && kwargs.contains(keyword_name)) { + return kwargs[keyword_name]; + } + return default_value; +} + void pyexport_aspect_helpers(py::module& m); diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp index bd88a864088..bc5bc10b8ab 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp @@ -350,4 +350,352 @@ TEST_F(AllAsFormattedEvidenceCheck, EmptyText) const py::str text(""); const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper); EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); +} + +using ParseParamsCheck = PyEnvCheck; + +TEST_F(ParseParamsCheck, PositionalArgumentPresent) +{ + py::args args = py::make_tuple(42); + py::kwargs kwargs; + py::object default_value = py::int_(0); + + py::object result = parse_params(0, "key", default_value, args, kwargs); + EXPECT_EQ(result.cast(), 42); +} + +TEST_F(ParseParamsCheck, KeywordArgumentPresent) +{ + py::args args; + py::kwargs kwargs; + kwargs["key"] = py::int_(42); + py::object default_value = py::int_(0); + + py::object result = parse_params(0, "key", default_value, args, kwargs); + EXPECT_EQ(result.cast(), 42); +} + +TEST_F(ParseParamsCheck, NoArgumentUsesDefault) +{ + py::args args; + py::kwargs kwargs; + py::object default_value = py::int_(42); + + py::object result = parse_params(0, "key", default_value, args, kwargs); + EXPECT_EQ(result.cast(), 42); +} + +TEST_F(ParseParamsCheck, PositionalOverridesKeyword) +{ + py::args args = py::make_tuple(100); + py::kwargs kwargs; + kwargs["key"] = py::int_(42); + py::object default_value = py::int_(0); + + py::object result = parse_params(0, "key", default_value, args, kwargs); + EXPECT_EQ(result.cast(), 100); +} + +TEST_F(ParseParamsCheck, HandlesMissingKeyword) +{ + py::args args; + py::kwargs kwargs; + py::object default_value = py::str("default_value"); + + py::object result = parse_params(0, "missing_key", default_value, args, kwargs); + EXPECT_STREQ(result.cast().c_str(), "default_value"); +} + +TEST(SplitTaints, EmptyString) +{ + std::string input = ""; + std::vector expected_output = { "" }; + std::vector result = split_taints(input); + EXPECT_EQ(result, expected_output); +} + +TEST(SplitTaints, NoTaintsInString) +{ + std::string input = "This is a regular string."; + std::vector expected_output = { "This is a regular string." }; + std::vector result = split_taints(input); + EXPECT_EQ(result, expected_output); +} + +TEST(SplitTaints, SingleTaintInString) +{ + std::string input = "This is a :+-test-+: string."; + std::vector expected_output = { "This is a ", ":+-", "test", "-+:", " string." }; + std::vector result = split_taints(input); + EXPECT_EQ(result, expected_output); +} + +TEST(SplitTaints, MultipleTaintsInString) +{ + std::string input = "This :+-is-+: a :+-test-+: string."; + std::vector expected_output = { "This ", ":+-", "is", "-+:", " a ", + ":+-", "test", "-+:", " string." }; + std::vector result = split_taints(input); + EXPECT_EQ(result, expected_output); +} + +TEST(SplitTaints, TaintsAtStartAndEnd) +{ + std::string input = ":+-Start-+: and :+-End-+:"; + std::vector expected_output = { "", ":+-", "Start", "-+:", + " and ", ":+-", "End", "-+:" }; + std::vector result = split_taints(input); + EXPECT_EQ(result, expected_output); +} + +TEST(SplitTaints, ConsecutiveTaints) +{ + std::string input = "Text :+-taint1-+: :+-taint2-+: after."; + std::vector expected_output = { "Text ", ":+-", "taint1", "-+:", " ", + ":+-", "taint2", "-+:", " after." }; + std::vector result = split_taints(input); + EXPECT_EQ(result, expected_output); +} + +using SetRangesOnSplittedCheck = PyEnvWithContext; + +TEST_F(SetRangesOnSplittedCheck, EmptySourceAndSplit) +{ + py::str source_str = ""; + py::list split_result; + TaintRangeRefs source_ranges; + auto tx_map = Initializer::get_tainting_map(); + bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, false); + EXPECT_FALSE(result); + + for (const auto& item : split_result) { + EXPECT_STREQ(AnyTextObjectToString(item.cast()).c_str(), ""); + auto ranges = get_ranges(item.ptr(), tx_map); + EXPECT_TRUE(ranges.first.empty()); + } +} + +TEST_F(SetRangesOnSplittedCheck, SingleSplitWithoutSeparator) +{ + py::str source_str = "This is a test string."; + py::list split_result; + split_result.append(py::str("This")); + split_result.append(py::str("is a test string.")); + + Source source("source1", "sample_value", OriginType::BODY); + TaintRangeRefs source_ranges = { std::make_shared(0, 4, source) }; + api_set_ranges(source_str, source_ranges); + auto tx_map = Initializer::get_tainting_map(); + bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, false); + EXPECT_TRUE(result); + + auto first = split_result[0]; + auto first_ranges = get_ranges(first.ptr(), tx_map); + EXPECT_EQ(first_ranges.first.size(), 1); + EXPECT_EQ(first_ranges.first[0]->start, 0); + EXPECT_EQ(first_ranges.first[0]->length, 4); + + auto last = split_result[1]; + auto last_ranges = get_ranges(last.ptr(), tx_map); + EXPECT_TRUE(last_ranges.first.empty()); +} + +TEST_F(SetRangesOnSplittedCheck, MultipleSplitsNoSeparator) +{ + py::str source_str = "This is a test string."; + py::list split_result; + split_result.append(py::str("This")); + split_result.append(py::str("is")); + split_result.append(py::str("a")); + split_result.append(py::str("test")); + split_result.append(py::str("string.")); + + Source source1("source1", "sample_value1", OriginType::BODY); + Source source2("source2", "sample_value2", OriginType::BODY); + TaintRangeRefs source_ranges = { + std::make_shared(0, 4, source1), // Taint "This" + std::make_shared(10, 4, source2) // Taint "test" + }; + api_set_ranges(source_str, source_ranges); + auto tx_map = Initializer::get_tainting_map(); + + bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, false); + EXPECT_TRUE(result); + + // Check first split part "This" + auto first = split_result[0]; + auto first_ranges = get_ranges(first.ptr(), tx_map); + EXPECT_EQ(first_ranges.first.size(), 1); + EXPECT_EQ(first_ranges.first[0]->start, 0); + EXPECT_EQ(first_ranges.first[0]->length, 4); + + // Check middle split part "test" + auto test_part = split_result[3]; + auto test_ranges = get_ranges(test_part.ptr(), tx_map); + EXPECT_EQ(test_ranges.first.size(), 1); + EXPECT_EQ(test_ranges.first[0]->start, 0); // Position within "test" + EXPECT_EQ(test_ranges.first[0]->length, 4); + + // Check that other parts have no ranges + for (int i : { 1, 2, 4 }) { + auto part = split_result[i]; + auto part_ranges = get_ranges(part.ptr(), tx_map); + EXPECT_TRUE(part_ranges.first.empty()); + } +} + +// JJJ bug +TEST_F(SetRangesOnSplittedCheck, SplitWithSeparatorIncluded) +{ + py::str source_str = "This|is|a|test|string."; + py::list split_result; + split_result.append(py::str("This")); + split_result.append(py::str("is")); + split_result.append(py::str("a")); + split_result.append(py::str("test")); + split_result.append(py::str("string.")); + + Source source1("source1", "sample_value1", OriginType::BODY); + Source source2("source2", "sample_value2", OriginType::BODY); + TaintRangeRefs source_ranges = { + std::make_shared(0, 4, source1), // Taint "This" + std::make_shared(10, 4, source2) // Taint "test" + }; + api_set_ranges(source_str, source_ranges); + auto tx_map = Initializer::get_tainting_map(); + + bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, true); + EXPECT_TRUE(result); + + // Check first split part "This" + auto first = split_result[0]; + auto first_ranges = get_ranges(first.ptr(), tx_map); + EXPECT_EQ(first_ranges.first.size(), 1); + EXPECT_EQ(first_ranges.first[0]->start, 0); + EXPECT_EQ(first_ranges.first[0]->length, 4); + + // Check middle split part "test" + auto test_part = split_result[3]; + auto test_ranges = get_ranges(test_part.ptr(), tx_map); + EXPECT_EQ(test_ranges.first.size(), 1); + EXPECT_EQ(test_ranges.first[0]->start, 0); // Position within "test" + EXPECT_EQ(test_ranges.first[0]->length, 4); + + // Check that other parts have no ranges + for (int i : { 1, 2, 4 }) { + auto part = split_result[i]; + auto part_ranges = get_ranges(part.ptr(), tx_map); + EXPECT_TRUE(part_ranges.first.empty()); + } +} + +TEST_F(SetRangesOnSplittedCheck, EmptyRanges) +{ + py::str source_str = "This is a test string."; + py::list split_result; + split_result.append(py::str("This")); + split_result.append(py::str("is a test string.")); + + TaintRangeRefs source_ranges; // Empty ranges + auto tx_map = Initializer::get_tainting_map(); + + bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, false); + EXPECT_FALSE(result); + + // Check that no ranges are applied to the split result + for (const auto& item : split_result) { + auto item_ranges = get_ranges(item.ptr(), tx_map); + EXPECT_TRUE(item_ranges.first.empty()); + } +} + +using ProcessFlagAddedArgsTest = PyEnvCheck; + +TEST_F(ProcessFlagAddedArgsTest, NoAddedArgsOriginalNone) +{ + PyObject* orig_function = Py_None; + int flag_added_args = 0; + py::tuple args = py::make_tuple("arg1", "arg2"); + py::dict kwargs; + + PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); + + // Should return args as no slicing is required + EXPECT_EQ(result, args.ptr()); +} + +// Test with added args, original function is None +TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalNone) +{ + PyObject* orig_function = Py_None; + int flag_added_args = 1; + py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); + py::dict kwargs; + + PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); + + // Should return the full argument list since no slicing is needed + EXPECT_EQ(result, args.ptr()); +} +// Test with added args, original function is custom +TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalCustomFunction) +{ + PyObject* orig_function = Py_None; + py::object custom_function = py::cpp_function([](py::tuple args, py::dict kwargs) { return args[0]; }); + orig_function = custom_function.ptr(); + + int flag_added_args = 1; + py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); + py::dict kwargs; + + PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); + + // Only "arg1" and "arg2" should be passed to the original function (sliced) + py::tuple expected_result = py::make_tuple("arg1", "arg2"); + EXPECT_EQ(py::reinterpret_borrow(result), expected_result); +} + +// Test with added args, original function is built-in +TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalBuiltinFunction) +{ + PyObject* orig_function = (PyObject*)&PyUnicode_Type; // Use the built-in unicode function as an example + int flag_added_args = 1; + py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); + py::dict kwargs; + + PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); + + // Should return the full argument list because built-in functions are exempt + EXPECT_EQ(result, args.ptr()); +} + +// Test with no added args, original function is custom +TEST_F(ProcessFlagAddedArgsTest, NoAddedArgsOriginalCustomFunction) +{ + py::object custom_function = py::cpp_function([](py::tuple args, py::dict kwargs) { return args[0]; }); + PyObject* orig_function = custom_function.ptr(); + + int flag_added_args = 0; + py::tuple args = py::make_tuple("arg1", "arg2"); + py::dict kwargs; + + PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); + + // Since there are no added args, all args should be passed to the custom function + py::tuple expected_result = py::make_tuple("arg1", "arg2"); + EXPECT_EQ(py::reinterpret_borrow(result), expected_result); +} + +// Test with built-in str function and added args +TEST_F(ProcessFlagAddedArgsTest, BuiltInFunctionWithAddedArgs) +{ + PyObject* orig_function = (PyObject*)&PyUnicode_Type; // Use the built-in str function + int flag_added_args = 1; + py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); + py::dict kwargs; + + PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); + + // Since built-in functions are exempt, it should return the full argument list + EXPECT_EQ(result, args.ptr()); } \ No newline at end of file From 49ce46445e1f1f28b0b51880ecea89425f2f13ae Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 11 Sep 2024 13:48:02 +0200 Subject: [PATCH 09/24] checkpoint Signed-off-by: Juanjo Alvarez --- .../_iast/_taint_tracking/Aspects/Helpers.h | 1 - .../_taint_tracking/tests/test_helpers.cpp | 177 +++++++++--------- 2 files changed, 91 insertions(+), 87 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h index 84cfd09c733..9341cd24b0e 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h @@ -176,7 +176,6 @@ inline PyObject* process_flag_added_args(PyObject* orig_function, const int flag_added_args, PyObject* args, PyObject* kwargs) { // If orig_function is not None and not the built-in str, bytes, or bytearray, slice args - if (const auto orig_function_type = Py_TYPE(orig_function); orig_function != Py_None && orig_function_type != &PyUnicode_Type && orig_function_type != &PyByteArray_Type && orig_function_type != &PyBytes_Type) { diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp index bc5bc10b8ab..20b031b293b 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp @@ -545,49 +545,49 @@ TEST_F(SetRangesOnSplittedCheck, MultipleSplitsNoSeparator) } // JJJ bug -TEST_F(SetRangesOnSplittedCheck, SplitWithSeparatorIncluded) -{ - py::str source_str = "This|is|a|test|string."; - py::list split_result; - split_result.append(py::str("This")); - split_result.append(py::str("is")); - split_result.append(py::str("a")); - split_result.append(py::str("test")); - split_result.append(py::str("string.")); - - Source source1("source1", "sample_value1", OriginType::BODY); - Source source2("source2", "sample_value2", OriginType::BODY); - TaintRangeRefs source_ranges = { - std::make_shared(0, 4, source1), // Taint "This" - std::make_shared(10, 4, source2) // Taint "test" - }; - api_set_ranges(source_str, source_ranges); - auto tx_map = Initializer::get_tainting_map(); - - bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, true); - EXPECT_TRUE(result); - - // Check first split part "This" - auto first = split_result[0]; - auto first_ranges = get_ranges(first.ptr(), tx_map); - EXPECT_EQ(first_ranges.first.size(), 1); - EXPECT_EQ(first_ranges.first[0]->start, 0); - EXPECT_EQ(first_ranges.first[0]->length, 4); - - // Check middle split part "test" - auto test_part = split_result[3]; - auto test_ranges = get_ranges(test_part.ptr(), tx_map); - EXPECT_EQ(test_ranges.first.size(), 1); - EXPECT_EQ(test_ranges.first[0]->start, 0); // Position within "test" - EXPECT_EQ(test_ranges.first[0]->length, 4); - - // Check that other parts have no ranges - for (int i : { 1, 2, 4 }) { - auto part = split_result[i]; - auto part_ranges = get_ranges(part.ptr(), tx_map); - EXPECT_TRUE(part_ranges.first.empty()); - } -} +// TEST_F(SetRangesOnSplittedCheck, SplitWithSeparatorIncluded) +// { +// py::str source_str = "This|is|a|test|string."; +// py::list split_result; +// split_result.append(py::str("This")); +// split_result.append(py::str("is")); +// split_result.append(py::str("a")); +// split_result.append(py::str("test")); +// split_result.append(py::str("string.")); +// +// Source source1("source1", "sample_value1", OriginType::BODY); +// Source source2("source2", "sample_value2", OriginType::BODY); +// TaintRangeRefs source_ranges = { +// std::make_shared(0, 4, source1), // Taint "This" +// std::make_shared(10, 4, source2) // Taint "test" +// }; +// api_set_ranges(source_str, source_ranges); +// auto tx_map = Initializer::get_tainting_map(); +// +// bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, true); +// EXPECT_TRUE(result); +// +// // Check first split part "This" +// auto first = split_result[0]; +// auto first_ranges = get_ranges(first.ptr(), tx_map); +// EXPECT_EQ(first_ranges.first.size(), 1); +// EXPECT_EQ(first_ranges.first[0]->start, 0); +// EXPECT_EQ(first_ranges.first[0]->length, 4); +// +// // Check middle split part "test" +// auto test_part = split_result[3]; +// auto test_ranges = get_ranges(test_part.ptr(), tx_map); +// EXPECT_EQ(test_ranges.first.size(), 1); +// EXPECT_EQ(test_ranges.first[0]->start, 0); // Position within "test" +// EXPECT_EQ(test_ranges.first[0]->length, 4); +// +// // Check that other parts have no ranges +// for (int i : { 1, 2, 4 }) { +// auto part = split_result[i]; +// auto part_ranges = get_ranges(part.ptr(), tx_map); +// EXPECT_TRUE(part_ranges.first.empty()); +// } +// } TEST_F(SetRangesOnSplittedCheck, EmptyRanges) { @@ -637,55 +637,60 @@ TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalNone) // Should return the full argument list since no slicing is needed EXPECT_EQ(result, args.ptr()); } -// Test with added args, original function is custom -TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalCustomFunction) -{ - PyObject* orig_function = Py_None; - py::object custom_function = py::cpp_function([](py::tuple args, py::dict kwargs) { return args[0]; }); - orig_function = custom_function.ptr(); - - int flag_added_args = 1; - py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); - py::dict kwargs; - PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); - - // Only "arg1" and "arg2" should be passed to the original function (sliced) - py::tuple expected_result = py::make_tuple("arg1", "arg2"); - EXPECT_EQ(py::reinterpret_borrow(result), expected_result); -} +// JJJ bug +// Test with added args, original function is custom +// TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalCustomFunction) +// { +// PyObject* orig_function = Py_None; +// py::object custom_function = py::cpp_function([](py::tuple args, py::dict kwargs) { return args[0]; }); +// orig_function = custom_function.ptr(); +// +// int flag_added_args = 1; +// py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); +// py::dict kwargs; +// +// PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); +// +// // Only "arg1" and "arg2" should be passed to the original function (sliced) +// py::tuple expected_result = py::make_tuple("arg1", "arg2"); +// EXPECT_EQ(py::reinterpret_borrow(result), expected_result); +// } // Test with added args, original function is built-in -TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalBuiltinFunction) -{ - PyObject* orig_function = (PyObject*)&PyUnicode_Type; // Use the built-in unicode function as an example - int flag_added_args = 1; - py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); - py::dict kwargs; - - PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); - - // Should return the full argument list because built-in functions are exempt - EXPECT_EQ(result, args.ptr()); -} +// JJJ +// TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalBuiltinFunction) +// { +// PyObject* orig_function = (PyObject*)&PyUnicode_Type; // Use the built-in unicode function as an example +// int flag_added_args = 1; +// py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); +// py::dict kwargs; +// +// PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); +// +// // Should return the full argument list because built-in functions are exempt +// EXPECT_EQ(result, args.ptr()); +// } +// JJJ bug // Test with no added args, original function is custom -TEST_F(ProcessFlagAddedArgsTest, NoAddedArgsOriginalCustomFunction) -{ - py::object custom_function = py::cpp_function([](py::tuple args, py::dict kwargs) { return args[0]; }); - PyObject* orig_function = custom_function.ptr(); - - int flag_added_args = 0; - py::tuple args = py::make_tuple("arg1", "arg2"); - py::dict kwargs; - - PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); - - // Since there are no added args, all args should be passed to the custom function - py::tuple expected_result = py::make_tuple("arg1", "arg2"); - EXPECT_EQ(py::reinterpret_borrow(result), expected_result); -} +// TEST_F(ProcessFlagAddedArgsTest, NoAddedArgsOriginalCustomFunction) +// { +// py::object custom_function = py::cpp_function([](py::tuple args, py::dict kwargs) { return args[0]; }); +// PyObject* orig_function = custom_function.ptr(); +// +// int flag_added_args = 0; +// py::tuple args = py::make_tuple("arg1", "arg2"); +// py::dict kwargs; +// +// PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); +// +// // Since there are no added args, all args should be passed to the custom function +// py::tuple expected_result = py::make_tuple("arg1", "arg2"); +// EXPECT_EQ(py::reinterpret_borrow(result), expected_result); +// } +// JJJ bug // Test with built-in str function and added args TEST_F(ProcessFlagAddedArgsTest, BuiltInFunctionWithAddedArgs) { From d86db9315e659ebda12af40fec1d81701f38f851 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 11 Sep 2024 16:17:11 +0200 Subject: [PATCH 10/24] Fix issue on set_ranges_on_splitted Signed-off-by: Juanjo Alvarez --- .../_iast/_taint_tracking/Aspects/Helpers.cpp | 14 +- .../_taint_tracking/tests/test_helpers.cpp | 167 +++++++----------- 2 files changed, 72 insertions(+), 109 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp index da747daa8b9..9fc54d2af50 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp @@ -303,6 +303,7 @@ set_ranges_on_splitted(const py::object& source_str, RANGE_START offset = 0; auto c_source_str = py::cast(source_str); const auto separator_increase = static_cast(not include_separator); + RANGE_START separator_count = 0; for (const auto& item : split_result) { if (not is_text(item.ptr()) or py::len(item) == 0) { @@ -312,19 +313,19 @@ set_ranges_on_splitted(const py::object& source_str, TaintRangeRefs item_ranges; // Find the item in the source_str. - const auto start = static_cast(c_source_str.find(c_item, offset)); + auto start = static_cast(c_source_str.find(c_item, offset)); if (start == -1) { continue; } - const auto end = static_cast(start + c_item.length()); + + auto end = static_cast(start + c_item.length()); // Find what source_ranges match these positions and create a new range with the start and len updated. for (const auto& range : source_ranges) { - if (const auto range_end_abs = range->start + range->length; range->start < end && range_end_abs > start) { - // Create a new range with the updated start + const auto range_end_abs = range->start + range->length; + if (range->start + separator_count < end && range_end_abs > start) { const auto new_range_start = std::max(range->start - offset, 0L); - const auto new_range_length = - std::min(end - start, (range->length - std::max(0L, offset - range->start))); + const auto new_range_length = std::min(range->length, static_cast(py::len(item))); item_ranges.emplace_back( initializer->allocate_taint_range(new_range_start, new_range_length, range->source)); } @@ -335,6 +336,7 @@ set_ranges_on_splitted(const py::object& source_str, } offset += py::len(item) + separator_increase; + separator_count += 1; } return some_set; diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp index 20b031b293b..3ee2213ff47 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp @@ -544,50 +544,49 @@ TEST_F(SetRangesOnSplittedCheck, MultipleSplitsNoSeparator) } } -// JJJ bug -// TEST_F(SetRangesOnSplittedCheck, SplitWithSeparatorIncluded) -// { -// py::str source_str = "This|is|a|test|string."; -// py::list split_result; -// split_result.append(py::str("This")); -// split_result.append(py::str("is")); -// split_result.append(py::str("a")); -// split_result.append(py::str("test")); -// split_result.append(py::str("string.")); -// -// Source source1("source1", "sample_value1", OriginType::BODY); -// Source source2("source2", "sample_value2", OriginType::BODY); -// TaintRangeRefs source_ranges = { -// std::make_shared(0, 4, source1), // Taint "This" -// std::make_shared(10, 4, source2) // Taint "test" -// }; -// api_set_ranges(source_str, source_ranges); -// auto tx_map = Initializer::get_tainting_map(); -// -// bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, true); -// EXPECT_TRUE(result); -// -// // Check first split part "This" -// auto first = split_result[0]; -// auto first_ranges = get_ranges(first.ptr(), tx_map); -// EXPECT_EQ(first_ranges.first.size(), 1); -// EXPECT_EQ(first_ranges.first[0]->start, 0); -// EXPECT_EQ(first_ranges.first[0]->length, 4); -// -// // Check middle split part "test" -// auto test_part = split_result[3]; -// auto test_ranges = get_ranges(test_part.ptr(), tx_map); -// EXPECT_EQ(test_ranges.first.size(), 1); -// EXPECT_EQ(test_ranges.first[0]->start, 0); // Position within "test" -// EXPECT_EQ(test_ranges.first[0]->length, 4); -// -// // Check that other parts have no ranges -// for (int i : { 1, 2, 4 }) { -// auto part = split_result[i]; -// auto part_ranges = get_ranges(part.ptr(), tx_map); -// EXPECT_TRUE(part_ranges.first.empty()); -// } -// } +TEST_F(SetRangesOnSplittedCheck, SplitWithSeparatorIncluded) +{ + py::str source_str = "This|is|a|test|string."; + py::list split_result; + split_result.append(py::str("This")); + split_result.append(py::str("is")); + split_result.append(py::str("a")); + split_result.append(py::str("test")); + split_result.append(py::str("string.")); + + Source source1("source1", "sample_value1", OriginType::BODY); + Source source2("source2", "sample_value2", OriginType::BODY); + TaintRangeRefs source_ranges = { + std::make_shared(0, 4, source1), // Taint "This" + std::make_shared(7, 4, source2) // Taint "test" + }; + api_set_ranges(source_str, source_ranges); + auto tx_map = Initializer::get_tainting_map(); + + bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, true); + EXPECT_TRUE(result); + + // Check first split part "This" + auto first = split_result[0]; + auto first_ranges = get_ranges(first.ptr(), tx_map); + EXPECT_EQ(first_ranges.first.size(), 1); + EXPECT_EQ(first_ranges.first[0]->start, 0); + EXPECT_EQ(first_ranges.first[0]->length, 4); + + // Check middle split part "test" + auto test_part = split_result[3]; + auto test_ranges = get_ranges(test_part.ptr(), tx_map); + EXPECT_EQ(test_ranges.first.size(), 1); + EXPECT_EQ(test_ranges.first[0]->start, 0); // Position within "test" + EXPECT_EQ(test_ranges.first[0]->length, 4); + + // Check that other parts have no ranges + for (int i : { 1, 2, 4 }) { + auto part = split_result[i]; + auto part_ranges = get_ranges(part.ptr(), tx_map); + EXPECT_TRUE(part_ranges.first.empty()); + } +} TEST_F(SetRangesOnSplittedCheck, EmptyRanges) { @@ -638,69 +637,31 @@ TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalNone) EXPECT_EQ(result, args.ptr()); } -// JJJ bug // Test with added args, original function is custom -// TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalCustomFunction) -// { -// PyObject* orig_function = Py_None; -// py::object custom_function = py::cpp_function([](py::tuple args, py::dict kwargs) { return args[0]; }); -// orig_function = custom_function.ptr(); -// -// int flag_added_args = 1; -// py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); -// py::dict kwargs; -// -// PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); -// -// // Only "arg1" and "arg2" should be passed to the original function (sliced) -// py::tuple expected_result = py::make_tuple("arg1", "arg2"); -// EXPECT_EQ(py::reinterpret_borrow(result), expected_result); -// } - -// Test with added args, original function is built-in -// JJJ -// TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalBuiltinFunction) -// { -// PyObject* orig_function = (PyObject*)&PyUnicode_Type; // Use the built-in unicode function as an example -// int flag_added_args = 1; -// py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); -// py::dict kwargs; -// -// PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); -// -// // Should return the full argument list because built-in functions are exempt -// EXPECT_EQ(result, args.ptr()); -// } - -// JJJ bug -// Test with no added args, original function is custom -// TEST_F(ProcessFlagAddedArgsTest, NoAddedArgsOriginalCustomFunction) -// { -// py::object custom_function = py::cpp_function([](py::tuple args, py::dict kwargs) { return args[0]; }); -// PyObject* orig_function = custom_function.ptr(); -// -// int flag_added_args = 0; -// py::tuple args = py::make_tuple("arg1", "arg2"); -// py::dict kwargs; -// -// PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); -// -// // Since there are no added args, all args should be passed to the custom function -// py::tuple expected_result = py::make_tuple("arg1", "arg2"); -// EXPECT_EQ(py::reinterpret_borrow(result), expected_result); -// } - -// JJJ bug -// Test with built-in str function and added args -TEST_F(ProcessFlagAddedArgsTest, BuiltInFunctionWithAddedArgs) -{ - PyObject* orig_function = (PyObject*)&PyUnicode_Type; // Use the built-in str function +TEST_F(ProcessFlagAddedArgsTest, AddedArgsOriginalCustomFunction) +{ + PyObject* orig_function = Py_None; + py::object custom_function = py::cpp_function([](py::str arg1, py::str arg2) { return arg1; }); + orig_function = custom_function.ptr(); + int flag_added_args = 1; py::tuple args = py::make_tuple("arg1", "arg2", "added_arg"); py::dict kwargs; PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); + EXPECT_STREQ(AnyTextObjectToString(py::reinterpret_borrow(result)).c_str(), "arg2"); +} - // Since built-in functions are exempt, it should return the full argument list - EXPECT_EQ(result, args.ptr()); -} \ No newline at end of file +// Test with no added args, original function is custom +TEST_F(ProcessFlagAddedArgsTest, NoAddedArgsOriginalCustomFunction) +{ + py::object custom_function = py::cpp_function([](py::str arg1, py::str arg2) { return arg1; }); + PyObject* orig_function = custom_function.ptr(); + + int flag_added_args = 0; + py::tuple args = py::make_tuple("arg1", "arg2"); + py::dict kwargs; + + PyObject* result = process_flag_added_args(orig_function, flag_added_args, args.ptr(), kwargs.ptr()); + EXPECT_STREQ(AnyTextObjectToString(py::reinterpret_borrow(result)).c_str(), "arg1"); +} From 1c3b7e07687645ea64024ed1d874b22d909c6080 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 11 Sep 2024 18:48:23 +0200 Subject: [PATCH 11/24] Fix basename aspect. Simplify set_ranges_on_splitted Signed-off-by: Juanjo Alvarez --- .../_taint_tracking/Aspects/AspectsOsPath.cpp | 2 +- .../_iast/_taint_tracking/Aspects/Helpers.cpp | 45 +++++++++---------- .../_taint_tracking/tests/test_helpers.cpp | 12 ++--- .../iast/aspects/test_aspect_helpers.py | 14 +++--- 4 files changed, 36 insertions(+), 37 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp index 4fb79ceee24..52c53b90216 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp @@ -118,7 +118,7 @@ api_ospathbasename_aspect(const StrType& path) // Create a fake list to call set_ranges_on_splitted on it (we are // only interested on the last path, which is the basename result) - auto prev_path_len = py::len(path) - py::len(result_o); + auto prev_path_len = py::len(path) - py::len(result_o) - 1; std::string filler(prev_path_len, 'X'); py::str filler_str(filler); py::list apply_list; diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp index 9fc54d2af50..fd9c403fe70 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp @@ -292,51 +292,50 @@ set_ranges_on_splitted(const py::object& source_str, const TaintRangeMapTypePtr& tx_map, bool include_separator) { + RANGE_START offset = 0; bool some_set = false; - // Some quick shortcuts if (source_ranges.empty() or py::len(split_result) == 0 or py::len(source_str) == 0 or not tx_map or tx_map->empty()) { return false; } - RANGE_START offset = 0; - auto c_source_str = py::cast(source_str); - const auto separator_increase = static_cast(not include_separator); - RANGE_START separator_count = 0; - for (const auto& item : split_result) { + cerr << "JJ ITEM: ============================ " << item << endl; if (not is_text(item.ptr()) or py::len(item) == 0) { continue; } - auto c_item = py::cast(item); TaintRangeRefs item_ranges; + RANGE_START part_len = py::len(item); + RANGE_START part_start = offset; + RANGE_START part_end = part_start + part_len; - // Find the item in the source_str. - auto start = static_cast(c_source_str.find(c_item, offset)); - if (start == -1) { - continue; - } + // bool first = true; + for (const auto& range : source_ranges) { + RANGE_START range_start = range->start; + RANGE_START range_end = range->start + range->length; - auto end = static_cast(start + c_item.length()); + // Check for overlap + if (range_start < part_end && range_end > part_start) { + RANGE_START new_start = std::max(range_start - part_start, 0L); + RANGE_START new_end = std::min(range_end - part_start, part_len); + RANGE_START new_length = std::min(new_end - new_start, part_len); - // Find what source_ranges match these positions and create a new range with the start and len updated. - for (const auto& range : source_ranges) { - const auto range_end_abs = range->start + range->length; - if (range->start + separator_count < end && range_end_abs > start) { - const auto new_range_start = std::max(range->start - offset, 0L); - const auto new_range_length = std::min(range->length, static_cast(py::len(item))); - item_ranges.emplace_back( - initializer->allocate_taint_range(new_range_start, new_range_length, range->source)); + if (new_length > 0) { + item_ranges.emplace_back(initializer->allocate_taint_range(new_start, new_length, range->source)); + } } } + if (not item_ranges.empty()) { set_ranges(item.ptr(), item_ranges, tx_map); some_set = true; } + offset += part_len; - offset += py::len(item) + separator_increase; - separator_count += 1; + if (!include_separator) { + offset += 1; + } } return some_set; diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp index 3ee2213ff47..15386780274 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp @@ -549,16 +549,16 @@ TEST_F(SetRangesOnSplittedCheck, SplitWithSeparatorIncluded) py::str source_str = "This|is|a|test|string."; py::list split_result; split_result.append(py::str("This")); - split_result.append(py::str("is")); - split_result.append(py::str("a")); - split_result.append(py::str("test")); - split_result.append(py::str("string.")); + split_result.append(py::str("|is")); + split_result.append(py::str("|a")); + split_result.append(py::str("|test")); + split_result.append(py::str("|string.")); Source source1("source1", "sample_value1", OriginType::BODY); Source source2("source2", "sample_value2", OriginType::BODY); TaintRangeRefs source_ranges = { std::make_shared(0, 4, source1), // Taint "This" - std::make_shared(7, 4, source2) // Taint "test" + std::make_shared(10, 4, source2) // Taint "test" }; api_set_ranges(source_str, source_ranges); auto tx_map = Initializer::get_tainting_map(); @@ -577,7 +577,7 @@ TEST_F(SetRangesOnSplittedCheck, SplitWithSeparatorIncluded) auto test_part = split_result[3]; auto test_ranges = get_ranges(test_part.ptr(), tx_map); EXPECT_EQ(test_ranges.first.size(), 1); - EXPECT_EQ(test_ranges.first[0]->start, 0); // Position within "test" + EXPECT_EQ(test_ranges.first[0]->start, 1); // Position within "test" EXPECT_EQ(test_ranges.first[0]->length, 4); // Check that other parts have no ranges diff --git a/tests/appsec/iast/aspects/test_aspect_helpers.py b/tests/appsec/iast/aspects/test_aspect_helpers.py index 7e8a5a41230..d0ca09c4482 100644 --- a/tests/appsec/iast/aspects/test_aspect_helpers.py +++ b/tests/appsec/iast/aspects/test_aspect_helpers.py @@ -168,10 +168,10 @@ def test_set_ranges_on_splitted_ospathsplit(): def test_set_ranges_on_splitted_ospathsplitext(): s = "abc/efgh/jkl.txt" - range1 = _build_sample_range(0, 3, s[0:2]) - range2 = _build_sample_range(4, 4, s[4:8]) - range3 = _build_sample_range(9, 3, s[9:12]) - range4 = _build_sample_range(13, 4, s[13:17]) + range1 = _build_sample_range(0, 3, s[0:2]) # abc + range2 = _build_sample_range(4, 4, s[4:8]) # efgh + range3 = _build_sample_range(9, 3, s[9:12]) # jkl + range4 = _build_sample_range(13, 4, s[13:17]) # txt set_ranges(s, (range1, range2, range3, range4)) ranges = get_ranges(s) assert ranges @@ -185,7 +185,7 @@ def test_set_ranges_on_splitted_ospathsplitext(): TaintRange(9, 3, Source("jkl", "sample_value", OriginType.PARAMETER)), ] assert get_ranges(parts[1]) == [ - TaintRange(1, 4, Source("txt", "sample_value", OriginType.PARAMETER)), + TaintRange(1, 3, Source("txt", "sample_value", OriginType.PARAMETER)), ] @@ -426,7 +426,7 @@ def test_set_ranges_on_splitted_bytearray() -> None: assert get_ranges(parts[0]) == [TaintRange(0, 2, Source("ab", "sample_value", OriginType.PARAMETER))] assert get_ranges(parts[1]) == [ TaintRange(1, 1, Source("f", "sample_value", OriginType.PARAMETER)), - TaintRange(1, 4, Source("second", "sample_value", OriginType.PARAMETER)), + TaintRange(1, 3, Source("second", "sample_value", OriginType.PARAMETER)), ] assert get_ranges(parts[2]) == [TaintRange(0, 2, Source("third", "sample_value", OriginType.PARAMETER))] @@ -443,4 +443,4 @@ def test_set_ranges_on_splitted_wrong_args(): parts = ["123", 45] set_ranges_on_splitted(s, ranges, parts) ranges = get_ranges(parts[0]) - assert ranges == [TaintRange(1, 3, Source("123", "sample_value", OriginType.PARAMETER))] + assert ranges == [TaintRange(1, 2, Source("123", "sample_value", OriginType.PARAMETER))] From 0c31b8cea7e418f99daa79e7f7fb4c230de88cfa Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 12:36:32 +0200 Subject: [PATCH 12/24] Add native tests to hatch and gitlab Signed-off-by: Juanjo Alvarez --- .gitlab/tests/appsec.yml | 6 +++++ .../_iast/_taint_tracking/Aspects/Helpers.cpp | 1 - ddtrace/appsec/_iast/_taint_tracking/clean.sh | 4 ++++ hatch.toml | 23 +++++++++++++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/.gitlab/tests/appsec.yml b/.gitlab/tests/appsec.yml index fb17a41b7f5..0d20b67a551 100644 --- a/.gitlab/tests/appsec.yml +++ b/.gitlab/tests/appsec.yml @@ -51,3 +51,9 @@ appsec threats fastapi: variables: SUITE_NAME: "appsec_threats_fastapi" retry: 2 + +appsec iast native: + extends: .test_base_hatch + variables: + SUITE_NAME: "appsec_iast_native" + retry: 2 diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp index fd9c403fe70..760312d958c 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp @@ -301,7 +301,6 @@ set_ranges_on_splitted(const py::object& source_str, } for (const auto& item : split_result) { - cerr << "JJ ITEM: ============================ " << item << endl; if (not is_text(item.ptr()) or py::len(item) == 0) { continue; } diff --git a/ddtrace/appsec/_iast/_taint_tracking/clean.sh b/ddtrace/appsec/_iast/_taint_tracking/clean.sh index 9542882c81e..32815fec547 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/clean.sh +++ b/ddtrace/appsec/_iast/_taint_tracking/clean.sh @@ -3,5 +3,9 @@ set -exu #cd -- "$(dirname -- "${BASH_SOURCE[0]}")" || exit rm -rf CMakeFiles/ CMakeCache.txt Makefile cmake_install.cmake __pycache__/ .cmake *.cbp Testing +rm -rf tests/CMakeFiles/ tests/CMakeCache.txt tests/Makefile tests/cmake_install.cmake tests/__pycache__/ tests/.cmake *.cbp rm -rf cmake-build-debug cmake-build-default cmake-build-tests +rm -rf tests/cmake-build-debug tests/cmake-build-default tests/cmake-build-tests yes|rm -f *.so +yes|rm -f tests/*.so +yes|rm -f tests/native_tests diff --git a/hatch.toml b/hatch.toml index 16b90392797..31fc86501b3 100644 --- a/hatch.toml +++ b/hatch.toml @@ -264,6 +264,29 @@ flask = ["~=2.3"] python = ["3.8", "3.10", "3.12"] flask = ["~=3.0"] +## ASM Native IAST module + +[envs.appsec_iast_native] +template = "appsec_iast_native" +dependencies = [ + "cmake", + "pybind11", + "clang" +] + +[envs.appsec_iast_native.env-vars] +CMAKE_BUILD_PARALELL_LEVEL = "12" + +[envs.appsec_iast_native.scripts] +test = [ + "cmake -DCMAKE_BUILD_TYPE=Debug -DPYTHON_EXECUTABLE=python ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt", + "make -j -f ddtrace/appsec/_iast/_taint_tracking/tests/Makefile native_tests", + "ddtrace/appsec/_iast/_taint_tracking/tests/native_tests" +] + +[[envs.appsec_iast_native.matrix]] +python = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + ## ASM FastAPI [envs.appsec_threats_fastapi] From 65395dc781af4551df29ceba7d808d1d5c6904c1 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 12:39:52 +0200 Subject: [PATCH 13/24] yamlf ix Signed-off-by: Juanjo Alvarez --- .gitlab/tests/appsec.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab/tests/appsec.yml b/.gitlab/tests/appsec.yml index 0d20b67a551..082da8aba9f 100644 --- a/.gitlab/tests/appsec.yml +++ b/.gitlab/tests/appsec.yml @@ -54,6 +54,7 @@ appsec threats fastapi: appsec iast native: extends: .test_base_hatch + parallel: 9 variables: SUITE_NAME: "appsec_iast_native" retry: 2 From aa37aa217a5149834dcde8e418a209c4bbc105a6 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 14:31:36 +0200 Subject: [PATCH 14/24] fix Signed-off-by: Juanjo Alvarez --- ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp index 52c53b90216..4fb79ceee24 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp @@ -118,7 +118,7 @@ api_ospathbasename_aspect(const StrType& path) // Create a fake list to call set_ranges_on_splitted on it (we are // only interested on the last path, which is the basename result) - auto prev_path_len = py::len(path) - py::len(result_o) - 1; + auto prev_path_len = py::len(path) - py::len(result_o); std::string filler(prev_path_len, 'X'); py::str filler_str(filler); py::list apply_list; From a6c91baf75fa595c10532fd3590208968e2584ac Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 14:40:55 +0200 Subject: [PATCH 15/24] fix Signed-off-by: Juanjo Alvarez --- ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp | 2 +- tests/appsec/iast/aspects/test_ospath_aspects.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp index 4fb79ceee24..b1bbafb0317 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectsOsPath.cpp @@ -125,7 +125,7 @@ api_ospathbasename_aspect(const StrType& path) apply_list.append(filler_str); apply_list.append(result_o); - set_ranges_on_splitted(path, ranges, apply_list, tx_map, false); + set_ranges_on_splitted(path, ranges, apply_list, tx_map, true); return apply_list[1]; }); } diff --git a/tests/appsec/iast/aspects/test_ospath_aspects.py b/tests/appsec/iast/aspects/test_ospath_aspects.py index 976327cdd2c..295820ea801 100644 --- a/tests/appsec/iast/aspects/test_ospath_aspects.py +++ b/tests/appsec/iast/aspects/test_ospath_aspects.py @@ -233,6 +233,7 @@ def test_ospathbasename_tainted_normal(): res = ospathbasename_aspect(tainted_foobarbaz) assert res == "baz" + print(get_tainted_ranges(res)) assert get_tainted_ranges(res) == [TaintRange(0, 3, Source("test_ospath", "/foo/bar/baz", OriginType.PARAMETER))] From 5075bcf85b96b93081890269367f46942b76486e Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 15:09:55 +0200 Subject: [PATCH 16/24] Add native suite to suitespec.json Signed-off-by: Juanjo Alvarez --- tests/.suitespec.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/.suitespec.json b/tests/.suitespec.json index c4dc9138ecb..efe0656a8f8 100644 --- a/tests/.suitespec.json +++ b/tests/.suitespec.json @@ -629,6 +629,14 @@ "@remoteconfig", "tests/appsec/iast/*" ], + "appsec_iast_native": [ + "@bootstrap", + "@core", + "@tracing", + "@appsec", + "@appsec_iast", + "@remoteconfig" + ], "appsec_iast_memcheck": [ "@bootstrap", "@core", From 4ad0f858e39cb765207849867f19f85fa74a583e Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 15:44:03 +0200 Subject: [PATCH 17/24] fix cmake command Signed-off-by: Juanjo Alvarez --- .gitlab/tests/appsec.yml | 2 +- hatch.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab/tests/appsec.yml b/.gitlab/tests/appsec.yml index 082da8aba9f..7794b987352 100644 --- a/.gitlab/tests/appsec.yml +++ b/.gitlab/tests/appsec.yml @@ -54,7 +54,7 @@ appsec threats fastapi: appsec iast native: extends: .test_base_hatch - parallel: 9 + parallel: 6 variables: SUITE_NAME: "appsec_iast_native" retry: 2 diff --git a/hatch.toml b/hatch.toml index 31fc86501b3..f19ca03a43a 100644 --- a/hatch.toml +++ b/hatch.toml @@ -279,7 +279,7 @@ CMAKE_BUILD_PARALELL_LEVEL = "12" [envs.appsec_iast_native.scripts] test = [ - "cmake -DCMAKE_BUILD_TYPE=Debug -DPYTHON_EXECUTABLE=python ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt", + "cmake -DCMAKE_BUILD_TYPE=Debug -DPYTHON_EXECUTABLE=python -S ddtrace/appsec/_iast/_taint_tracking -B ddtrace/appsec/_iast/_taint_tracking", "make -j -f ddtrace/appsec/_iast/_taint_tracking/tests/Makefile native_tests", "ddtrace/appsec/_iast/_taint_tracking/tests/native_tests" ] From 07519196c2112ae3e2c070766c1263246f0d0c9d Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 16:05:56 +0200 Subject: [PATCH 18/24] hatch change Signed-off-by: Juanjo Alvarez --- hatch.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hatch.toml b/hatch.toml index f19ca03a43a..07925b9022b 100644 --- a/hatch.toml +++ b/hatch.toml @@ -280,7 +280,7 @@ CMAKE_BUILD_PARALELL_LEVEL = "12" [envs.appsec_iast_native.scripts] test = [ "cmake -DCMAKE_BUILD_TYPE=Debug -DPYTHON_EXECUTABLE=python -S ddtrace/appsec/_iast/_taint_tracking -B ddtrace/appsec/_iast/_taint_tracking", - "make -j -f ddtrace/appsec/_iast/_taint_tracking/tests/Makefile native_tests", + "make -j -f ddtrace/appsec/_iast/_taint_tracking/tests/Makefile ddtrace/appsec/_iast/_taint_tracking/tests/native_tests", "ddtrace/appsec/_iast/_taint_tracking/tests/native_tests" ] From 21f93e5f944794eeef1612b775ad3fbc0459a110 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 16:07:48 +0200 Subject: [PATCH 19/24] hatch change Signed-off-by: Juanjo Alvarez --- .gitlab/tests/appsec.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/tests/appsec.yml b/.gitlab/tests/appsec.yml index 7794b987352..94686d487f6 100644 --- a/.gitlab/tests/appsec.yml +++ b/.gitlab/tests/appsec.yml @@ -54,7 +54,7 @@ appsec threats fastapi: appsec iast native: extends: .test_base_hatch - parallel: 6 + parallel: 3 variables: SUITE_NAME: "appsec_iast_native" retry: 2 From 8416a3d2a5d5efb4486e929a2b135a794b7e3575 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 16:15:01 +0200 Subject: [PATCH 20/24] hatch change Signed-off-by: Juanjo Alvarez --- hatch.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hatch.toml b/hatch.toml index 07925b9022b..f19ca03a43a 100644 --- a/hatch.toml +++ b/hatch.toml @@ -280,7 +280,7 @@ CMAKE_BUILD_PARALELL_LEVEL = "12" [envs.appsec_iast_native.scripts] test = [ "cmake -DCMAKE_BUILD_TYPE=Debug -DPYTHON_EXECUTABLE=python -S ddtrace/appsec/_iast/_taint_tracking -B ddtrace/appsec/_iast/_taint_tracking", - "make -j -f ddtrace/appsec/_iast/_taint_tracking/tests/Makefile ddtrace/appsec/_iast/_taint_tracking/tests/native_tests", + "make -j -f ddtrace/appsec/_iast/_taint_tracking/tests/Makefile native_tests", "ddtrace/appsec/_iast/_taint_tracking/tests/native_tests" ] From a860ddb315ea6ccdbb439064cddd5f5bb9d3ba94 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 16:22:59 +0200 Subject: [PATCH 21/24] hatch change Signed-off-by: Juanjo Alvarez --- hatch.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/hatch.toml b/hatch.toml index f19ca03a43a..b8fc22bd0bf 100644 --- a/hatch.toml +++ b/hatch.toml @@ -274,9 +274,6 @@ dependencies = [ "clang" ] -[envs.appsec_iast_native.env-vars] -CMAKE_BUILD_PARALELL_LEVEL = "12" - [envs.appsec_iast_native.scripts] test = [ "cmake -DCMAKE_BUILD_TYPE=Debug -DPYTHON_EXECUTABLE=python -S ddtrace/appsec/_iast/_taint_tracking -B ddtrace/appsec/_iast/_taint_tracking", From 6fe38ed98619a16171055cc4496e183c98ea0184 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 16:23:38 +0200 Subject: [PATCH 22/24] hatch change Signed-off-by: Juanjo Alvarez --- hatch.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hatch.toml b/hatch.toml index b8fc22bd0bf..594a87dde9b 100644 --- a/hatch.toml +++ b/hatch.toml @@ -277,7 +277,7 @@ dependencies = [ [envs.appsec_iast_native.scripts] test = [ "cmake -DCMAKE_BUILD_TYPE=Debug -DPYTHON_EXECUTABLE=python -S ddtrace/appsec/_iast/_taint_tracking -B ddtrace/appsec/_iast/_taint_tracking", - "make -j -f ddtrace/appsec/_iast/_taint_tracking/tests/Makefile native_tests", + "make -f ddtrace/appsec/_iast/_taint_tracking/tests/Makefile native_tests", "ddtrace/appsec/_iast/_taint_tracking/tests/native_tests" ] From f7f7158a343851fe3f3d1f1d7a1d886753b7996b Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 16:34:19 +0200 Subject: [PATCH 23/24] hatch change Signed-off-by: Juanjo Alvarez --- .gitlab/tests/appsec.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/tests/appsec.yml b/.gitlab/tests/appsec.yml index 94686d487f6..7794b987352 100644 --- a/.gitlab/tests/appsec.yml +++ b/.gitlab/tests/appsec.yml @@ -54,7 +54,7 @@ appsec threats fastapi: appsec iast native: extends: .test_base_hatch - parallel: 3 + parallel: 6 variables: SUITE_NAME: "appsec_iast_native" retry: 2 From 018fe975c8378f64aebde464371ad22db381d105 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 12 Sep 2024 16:38:56 +0200 Subject: [PATCH 24/24] Remove debug stuff Signed-off-by: Juanjo Alvarez --- tests/appsec/iast/aspects/test_ospath_aspects.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/appsec/iast/aspects/test_ospath_aspects.py b/tests/appsec/iast/aspects/test_ospath_aspects.py index 295820ea801..976327cdd2c 100644 --- a/tests/appsec/iast/aspects/test_ospath_aspects.py +++ b/tests/appsec/iast/aspects/test_ospath_aspects.py @@ -233,7 +233,6 @@ def test_ospathbasename_tainted_normal(): res = ospathbasename_aspect(tainted_foobarbaz) assert res == "baz" - print(get_tainted_ranges(res)) assert get_tainted_ranges(res) == [TaintRange(0, 3, Source("test_ospath", "/foo/bar/baz", OriginType.PARAMETER))]