Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(iast): add google testing for the native IAST module #10600

Merged
merged 27 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
be2d572
Add google testing for the native IAST module
juanjux Sep 10, 2024
b62725b
Link unittests to pybind11 and Python
juanjux Sep 10, 2024
cd3e5e1
Added another test file, some fixes
juanjux Sep 10, 2024
131f43d
Complete stringutils unittests
juanjux Sep 10, 2024
8f81334
checkpoint
juanjux Sep 10, 2024
f50cbf3
checkpoint
juanjux Sep 10, 2024
e8871ea
more tests and fixes
juanjux Sep 11, 2024
4a380b7
Added remaining tests for Helper.cpp
juanjux Sep 11, 2024
49ce464
checkpoint
juanjux Sep 11, 2024
d86db93
Fix issue on set_ranges_on_splitted
juanjux Sep 11, 2024
1c3b7e0
Fix basename aspect. Simplify set_ranges_on_splitted
juanjux Sep 11, 2024
3a1816b
Merge branch 'main' into juanjux/APPSEC-54599-native-unittesting
juanjux Sep 12, 2024
0c31b8c
Add native tests to hatch and gitlab
juanjux Sep 12, 2024
65395dc
yamlf ix
juanjux Sep 12, 2024
aa37aa2
fix
juanjux Sep 12, 2024
a6c91ba
fix
juanjux Sep 12, 2024
5075bcf
Add native suite to suitespec.json
juanjux Sep 12, 2024
4ad0f85
fix cmake command
juanjux Sep 12, 2024
0751919
hatch change
juanjux Sep 12, 2024
21f93e5
hatch change
juanjux Sep 12, 2024
8416a3d
hatch change
juanjux Sep 12, 2024
a860ddb
hatch change
juanjux Sep 12, 2024
6fe38ed
hatch change
juanjux Sep 12, 2024
f7f7158
hatch change
juanjux Sep 12, 2024
018fe97
Remove debug stuff
juanjux Sep 12, 2024
d603d78
Merge branch 'main' into juanjux/APPSEC-54599-native-unittesting
juanjux Sep 13, 2024
36260bd
Merge branch 'main' into juanjux/APPSEC-54599-native-unittesting
juanjux Sep 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitlab/tests/appsec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,10 @@ appsec threats fastapi:
variables:
SUITE_NAME: "appsec_threats_fastapi"
retry: 2

appsec iast native:
extends: .test_base_hatch
parallel: 6
variables:
SUITE_NAME: "appsec_iast_native"
retry: 2
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ api_ospathbasename_aspect(const StrType& path)
apply_list.append(filler_str);
apply_list.append(result_o);

set_ranges_on_splitted(path, ranges, apply_list, tx_map, false);
set_ranges_on_splitted(path, ranges, apply_list, tx_map, true);
return apply_list[1];
});
}
Expand Down
136 changes: 37 additions & 99 deletions ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#include "Initializer/Initializer.h"
#include <algorithm>
#include <iostream>
#include <regex>

using namespace pybind11::literals;
namespace py = pybind11;
Expand Down Expand Up @@ -44,7 +43,11 @@ as_formatted_evidence(const string& text,
const optional<TagMappingMode>& tag_mapping_mode,
const optional<const py::dict>& new_ranges)
{
if (text_ranges.empty()) {
if (const auto tx_map = Initializer::get_tainting_map(); !tx_map) {
return text;
}

if (text_ranges.empty() or text.empty()) {
return text;
}
vector<string> res_vector;
Expand All @@ -55,20 +58,23 @@ as_formatted_evidence(const string& text,

for (const auto& taint_range : text_ranges) {
string content;
if (!tag_mapping_mode) {
if (!tag_mapping_mode or tag_mapping_mode.value() == TagMappingMode::Normal) {
content = get_default_content(taint_range);
} else
switch (*tag_mapping_mode) {
case TagMappingMode::Mapper:
case TagMappingMode::Mapper: {
content = to_string(taint_range->get_hash());
break;
case TagMappingMode::Mapper_Replace:
}
case TagMappingMode::Mapper_Replace: {
content = mapper_replace(taint_range, new_ranges);
break;
}
default: {
// Nothing
}
}

const auto tag = get_tag(content);

const auto range_end = taint_range->start + taint_range->length;
Expand All @@ -91,28 +97,17 @@ as_formatted_evidence(const string& text,
return oss.str();
}

template<class StrType>
StrType
all_as_formatted_evidence(const StrType& text, TagMappingMode tag_mapping_mode)
{
TaintRangeRefs text_ranges = api_get_ranges(text);
return StrType(as_formatted_evidence(AnyTextObjectToString(text), text_ranges, tag_mapping_mode, nullopt));
}

template<class StrType>
StrType
int_as_formatted_evidence(const StrType& text, TaintRangeRefs& text_ranges, TagMappingMode tag_mapping_mode)
{
return StrType(as_formatted_evidence(AnyTextObjectToString(text), text_ranges, tag_mapping_mode, nullopt));
}

template<class StrType>
StrType
api_as_formatted_evidence(const StrType& text,
optional<const TaintRangeRefs>& text_ranges,
const optional<TagMappingMode>& tag_mapping_mode,
const optional<const py::dict>& new_ranges)
{
if (const auto tx_map = Initializer::get_tainting_map(); !tx_map) {
return text;
}

TaintRangeRefs _ranges;
if (!text_ranges) {
_ranges = api_get_ranges(text);
Expand All @@ -122,20 +117,6 @@ api_as_formatted_evidence(const StrType& text,
return StrType(as_formatted_evidence(AnyTextObjectToString(text), _ranges, tag_mapping_mode, new_ranges));
}

vector<string>
split_taints(const string& str_to_split)
{
const std::regex rgx(R"((:\+-(<[0-9.a-z\-]+>)?|(<[0-9.a-z\-]+>)?-\+:))");
std::sregex_token_iterator iter(str_to_split.begin(), str_to_split.end(), rgx, { -1, 0 });
vector<string> res;

for (const std::sregex_token_iterator end; iter != end; ++iter) {
res.push_back(*iter);
}

return res;
}

py::bytearray
api_convert_escaped_text_to_taint_text(const py::bytearray& taint_escaped_text, const TaintRangeRefs& ranges_orig)
{
Expand Down Expand Up @@ -200,22 +181,6 @@ api_convert_escaped_text_to_taint_text(PyObject* taint_escaped_text,
}
}

unsigned long int
getNum(const std::string& s)
{
unsigned int n = -1;
try {
n = std::stoul(s, nullptr, 10);
if (errno != 0) {
PyErr_Print();
}
} catch (std::exception&) {
// throw std::invalid_argument("Value is too big");
PyErr_Print();
}
return n;
}

template<class StrType>
std::tuple<StrType, TaintRangeRefs>
convert_escaped_text_to_taint_text(const StrType& taint_escaped_text, const TaintRangeRefs& ranges_orig)
Expand Down Expand Up @@ -327,49 +292,49 @@ set_ranges_on_splitted(const py::object& source_str,
const TaintRangeMapTypePtr& tx_map,
bool include_separator)
{
RANGE_START offset = 0;
bool some_set = false;

// Some quick shortcuts
if (source_ranges.empty() or py::len(split_result) == 0 or py::len(source_str) == 0 or not tx_map or
tx_map->empty()) {
return false;
}

RANGE_START offset = 0;
auto c_source_str = py::cast<std::string>(source_str);
const auto separator_increase = static_cast<int>(not include_separator);

for (const auto& item : split_result) {
if (not is_text(item.ptr()) or py::len(item) == 0) {
continue;
}
auto c_item = py::cast<std::string>(item);
TaintRangeRefs item_ranges;
RANGE_START part_len = py::len(item);
RANGE_START part_start = offset;
RANGE_START part_end = part_start + part_len;

// Find the item in the source_str.
const auto start = static_cast<RANGE_START>(c_source_str.find(c_item, offset));
if (start == -1) {
continue;
}
const auto end = static_cast<RANGE_START>(start + c_item.length());

// Find what source_ranges match these positions and create a new range with the start and len updated.
// bool first = true;
for (const auto& range : source_ranges) {
if (const auto range_end_abs = range->start + range->length; range->start < end && range_end_abs > start) {
// Create a new range with the updated start
const auto new_range_start = std::max(range->start - offset, 0L);
const auto new_range_length =
std::min(end - start, (range->length - std::max(0L, offset - range->start)));
item_ranges.emplace_back(
initializer->allocate_taint_range(new_range_start, new_range_length, range->source));
RANGE_START range_start = range->start;
RANGE_START range_end = range->start + range->length;

// Check for overlap
if (range_start < part_end && range_end > part_start) {
RANGE_START new_start = std::max(range_start - part_start, 0L);
RANGE_START new_end = std::min(range_end - part_start, part_len);
RANGE_START new_length = std::min(new_end - new_start, part_len);

if (new_length > 0) {
item_ranges.emplace_back(initializer->allocate_taint_range(new_start, new_length, range->source));
}
}
}

if (not item_ranges.empty()) {
set_ranges(item.ptr(), item_ranges, tx_map);
some_set = true;
}
offset += part_len;

offset += py::len(item) + separator_increase;
if (!include_separator) {
offset += 1;
}
}

return some_set;
Expand All @@ -389,22 +354,6 @@ api_set_ranges_on_splitted(const StrType& source_str,
return set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, include_separator);
}

py::object
parse_params(size_t position,
const char* keyword_name,
const py::object& default_value,
const py::args& args,
const py::kwargs& kwargs)
{
if (args.size() >= position + 1) {
return args[position];
}
if (kwargs && kwargs.contains(keyword_name)) {
return kwargs[keyword_name];
}
return default_value;
}

bool
has_pyerr()
{
Expand Down Expand Up @@ -470,17 +419,6 @@ pyexport_aspect_helpers(py::module& m)
"split_result"_a,
// cppcheck-suppress assignBoolToPointer
"include_separator"_a = false);
m.def("_all_as_formatted_evidence",
&all_as_formatted_evidence<py::str>,
"text"_a,
"tag_mapping_function"_a = nullopt,
py::return_value_policy::move);
m.def("_int_as_formatted_evidence",
&int_as_formatted_evidence<py::str>,
"text"_a,
"text_ranges"_a = nullopt,
"tag_mapping_function"_a = nullopt,
py::return_value_policy::move);
m.def("as_formatted_evidence",
&api_as_formatted_evidence<py::bytes>,
"text"_a,
Expand Down
Loading
Loading