Skip to content

Commit

Permalink
Merge branch 'main' into quinna/add-tests-pymongo-patch
Browse files Browse the repository at this point in the history
  • Loading branch information
quinna-h authored Sep 20, 2024
2 parents db43f80 + 33daba9 commit 2b74685
Show file tree
Hide file tree
Showing 62 changed files with 1,627 additions and 420 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/system-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ jobs:
# If ever it's needed, a valid key exists in the repo, using ${{ secrets.DD_API_KEY }}
DD_API_KEY: 1234567890abcdef1234567890abcdef
CMAKE_BUILD_PARALLEL_LEVEL: 12
AWS_ACCESS_KEY_ID: ${{ secrets.IDM_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.IDM_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: us-east-1
AWS_DEFAULT_REGION: us-east-1 # AWS services should use `AWS_REGION`, but some still use the older `AWS_DEFAULT_REGION`
steps:

- name: Checkout system tests
Expand Down Expand Up @@ -132,6 +136,10 @@ jobs:
# If ever it's needed, a valid key exists in the repo, using ${{ secrets.DD_API_KEY }}
DD_API_KEY: 1234567890abcdef1234567890abcdef
CMAKE_BUILD_PARALLEL_LEVEL: 12
AWS_ACCESS_KEY_ID: ${{ secrets.IDM_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.IDM_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: us-east-1
AWS_DEFAULT_REGION: us-east-1 # AWS services should use `AWS_REGION`, but some still use the older `AWS_DEFAULT_REGION`
steps:

- name: Checkout system tests
Expand Down Expand Up @@ -304,3 +312,15 @@ jobs:
with:
name: logs_parametric
path: artifact.tar.gz

finished:
runs-on: ubuntu-latest
needs: [parametric, system-tests]
if: success() || failure()
steps:
- name: True when everything else succeeded
if: needs.parametric.result == 'success' && needs.system-tests.result == 'success'
run: exit 0
- name: Fails if anything else failed
if: needs.parametric.result != 'success' || needs.system-tests.result != 'success'
run: exit 1
2 changes: 1 addition & 1 deletion .gitlab/package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build_base_venvs:
matrix:
- PYTHON_VERSION: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
variables:
CMAKE_BUILD_PARALLEL_LEVEL: 24
CMAKE_BUILD_PARALLEL_LEVEL: 12
PIP_VERBOSE: 1
script:
- pip install riot~=0.19.1
Expand Down
36 changes: 36 additions & 0 deletions benchmarks/appsec_iast_aspects/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,24 @@ aspect_iast_do_index:
warmups: 1
iast_enabled: 1

aspect_no_iast_do_index_on_dict: &aspect_no_iast_do_index_on_dict
iast_enabled: 0
processes: 10
loops: 1
values: 6
warmups: 1
mod_original_name: "bm.iast_fixtures.str_methods"
function_name: "do_index_on_dict"
args: [{'fOobaR': 4, 3: 'foobar'}, 'fOobaR']

aspect_iast_do_index_on_dict:
<< : *aspect_no_iast_do_index_on_dict
processes: 10
loops: 1
values: 6
warmups: 1
iast_enabled: 1

aspect_no_iast_do_join: &aspect_no_iast_do_join
iast_enabled: 0
processes: 10
Expand Down Expand Up @@ -377,6 +395,24 @@ aspect_iast_do_partition:
warmups: 1
iast_enabled: 1

aspect_no_iast_do_re_match_index: &aspect_no_iast_do_re_match_index
iast_enabled: 0
processes: 10
loops: 1
values: 6
warmups: 1
mod_original_name: "bm.iast_fixtures.str_methods"
function_name: "do_re_match_index"
args: ['(\\w+) (\\w+)', 'foo bar', 1]

aspect_iast_do_re_match_index:
<< : *aspect_no_iast_do_re_match_index
processes: 10
loops: 1
values: 6
warmups: 1
iast_enabled: 1

aspect_no_iast_do_re_sub: &aspect_no_iast_do_re_sub
iast_enabled: 0
processes: 10
Expand Down
9 changes: 9 additions & 0 deletions benchmarks/bm/iast_fixtures/str_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -1064,6 +1064,10 @@ def do_index(c: str, i: int) -> Text:
return c[i]


def do_index_on_dict(d: dict, k):
return d[k]


def do_methodcaller(s, func, *args):
func_method = operator.methodcaller(func, *args)
return func_method(s)
Expand Down Expand Up @@ -1236,3 +1240,8 @@ def index_lower_add(url):
def urlib_urlsplit(text):
results = urllib.parse.urlsplit(text)
return results


def do_re_match_index(text, regexp, index):
match = re.search(regexp, text)
return match[index]
2 changes: 2 additions & 0 deletions benchmarks/bm/iast_utils/aspects_benchmarks_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@
"do_format_with_named_parameter": ["foobar{key}", "baz"],
"do_format_with_positional_parameter": ["foobar{}", "baz"],
"do_index": ["foobar", 3],
"do_index_on_dict": [{"fOobaR": 4, 3: "foobar"}, "fOobaR"],
"do_join": ["foobar", ["baz", "pok"]],
"do_join_args_kwargs": ["foobar", ["baz", "pok"]],
"do_ljust": ["foobar", 2],
Expand All @@ -167,6 +168,7 @@
"do_partition": ["foobar", "o"],
"do_re_sub": ["foobar", "o", "a", 1],
"do_re_subn": ["foobar", "o", "a", 1],
"do_re_match_index": [r"(\w+) (\w+)", "foo bar", 1],
"do_replace": ["foobar", "o", "a", 1],
"do_rplit_separator_and_maxsplit": ["foobar", "o", 1],
"do_rsplit": ["foo bar baz", " ", 1],
Expand Down
50 changes: 50 additions & 0 deletions ddtrace/_trace/_span_pointer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
from enum import Enum
from hashlib import sha256
import random
from typing import Any
from typing import Dict
from typing import NamedTuple
from typing import Optional

from ddtrace._trace._span_link import SpanLink
from ddtrace._trace._span_link import SpanLinkKind
from ddtrace.internal.logger import get_logger


log = get_logger(__name__)


_SPAN_POINTER_SPAN_LINK_TRACE_ID = 0
Expand All @@ -16,6 +23,18 @@ class _SpanPointerDirection(Enum):
DOWNSTREAM = "d"


class _SpanPointerDescription(NamedTuple):
# Not to be confused with _SpanPointer. This class describes the parameters
# required to attach a span pointer to a Span. It lets us decouple code
# that calculates span pointers from code that actually attaches them to
# the right Span.

pointer_kind: str
pointer_direction: _SpanPointerDirection
pointer_hash: str
extra_attributes: Dict[str, Any]


class _SpanPointer(SpanLink):
def __init__(
self,
Expand All @@ -40,3 +59,34 @@ def __init__(
def __post_init__(self):
# Do not want to do the trace_id and span_id checks that SpanLink does.
pass


_STANDARD_HASHING_FUNCTION_FAILURE_PREFIX = "HashingFailure"


def _standard_hashing_function(*elements: bytes) -> str:
try:
if not elements:
raise ValueError("elements must not be empty")

# Please see the tests for more details about this logic.
return sha256(b"|".join(elements)).hexdigest()[:32]

except Exception as e:
log.warning(
"failed to generate standard hash for span pointer: %s",
str(e),
)
return _add_random_suffix(
prefix=_STANDARD_HASHING_FUNCTION_FAILURE_PREFIX,
minimum_length=32,
)


def _add_random_suffix(*, prefix: str, minimum_length: int) -> str:
if len(prefix) >= minimum_length:
return prefix

suffix = "".join(random.choice("0123456789abcdef") for _ in range(minimum_length - len(prefix))) # nosec

return prefix + suffix
Empty file.
101 changes: 101 additions & 0 deletions ddtrace/_trace/utils_botocore/span_pointers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from typing import Any
from typing import Dict
from typing import List

from ddtrace._trace._span_pointer import _SpanPointerDescription
from ddtrace._trace._span_pointer import _SpanPointerDirection
from ddtrace._trace._span_pointer import _standard_hashing_function
from ddtrace.internal.logger import get_logger


log = get_logger(__name__)


def extract_span_pointers_from_successful_botocore_response(
endpoint_name: str,
operation_name: str,
request_parameters: Dict[str, Any],
response: Dict[str, Any],
) -> List[_SpanPointerDescription]:
if endpoint_name == "s3":
return _extract_span_pointers_for_s3_response(operation_name, request_parameters, response)

return []


def _extract_span_pointers_for_s3_response(
operation_name: str,
request_parameters: Dict[str, Any],
response: Dict[str, Any],
) -> List[_SpanPointerDescription]:
if operation_name == "PutObject":
return _extract_span_pointers_for_s3_put_object_response(request_parameters, response)

return []


def _extract_span_pointers_for_s3_put_object_response(
request_parameters: Dict[str, Any],
response: Dict[str, Any],
) -> List[_SpanPointerDescription]:
# Endpoint Reference:
# https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html

try:
bucket = request_parameters["Bucket"]
key = request_parameters["Key"]
etag = response["ETag"]

# The ETag is surrounded by double quotes for some reason.
if etag.startswith('"') and etag.endswith('"'):
etag = etag[1:-1]

except KeyError as e:
log.warning(
"missing a parameter or response field required to make span pointer for S3.PutObject: %s",
str(e),
)
return []

try:
return [
_aws_s3_object_span_pointer_description(
pointer_direction=_SpanPointerDirection.DOWNSTREAM,
bucket=bucket,
key=key,
etag=etag,
)
]
except Exception as e:
log.warning(
"failed to generate S3.PutObject span pointer: %s",
str(e),
)
return []


def _aws_s3_object_span_pointer_description(
pointer_direction: _SpanPointerDirection,
bucket: str,
key: str,
etag: str,
) -> _SpanPointerDescription:
return _SpanPointerDescription(
pointer_kind="aws.s3.object",
pointer_direction=pointer_direction,
pointer_hash=_aws_s3_object_span_pointer_hash(bucket, key, etag),
extra_attributes={},
)


def _aws_s3_object_span_pointer_hash(bucket: str, key: str, etag: str) -> str:
if '"' in etag:
# Some AWS API endpoints put the ETag in double quotes. We expect the
# calling code to have correctly fixed this already.
raise ValueError(f"ETag should not have double quotes: {etag}")

return _standard_hashing_function(
bucket.encode("ascii"),
key.encode("utf-8"),
etag.encode("ascii"),
)
54 changes: 36 additions & 18 deletions ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectIndex.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "AspectIndex.h"
#include "Helpers.h"
#include "Utils/StringUtils.h"

/**
* @brief Index aspect
Expand All @@ -13,17 +14,36 @@
PyObject*
index_aspect(PyObject* result_o, PyObject* candidate_text, PyObject* idx, const TaintRangeMapTypePtr& tx_taint_map)
{
const auto idx_long = PyLong_AsLong(idx);
TaintRangeRefs ranges_to_set;
auto [ranges, ranges_error] = get_ranges(candidate_text, tx_taint_map);
if (ranges_error) {
return result_o;
}
for (const auto& current_range : ranges) {
if (current_range->start <= idx_long and idx_long < (current_range->start + current_range->length)) {
ranges_to_set.emplace_back(initializer->allocate_taint_range(0l, 1l, current_range->source));
break;

if (is_text(candidate_text)) {
for (const auto& current_range : ranges) {
const auto idx_long = PyLong_AsLong(idx);
if (current_range->start <= idx_long and idx_long < (current_range->start + current_range->length)) {
ranges_to_set.emplace_back(initializer->allocate_taint_range(0l, 1l, current_range->source));
break;
}
}

} else if (PyReMatch_Check(candidate_text)) { // For re.Match objects, taint the whole output
try {
const size_t& len_result_o{ get_pyobject_size(result_o) };
const auto& current_range = ranges.at(0);
ranges_to_set.emplace_back(initializer->allocate_taint_range(0l, len_result_o, current_range->source));
} catch (const std::out_of_range& ex) {
if (nullptr == result_o) {
throw py::index_error();
}
// No ranges found, return original object
return result_o;
}
} else {
// Other stuff
return result_o;
}

const auto& res_new_id = new_pyobject_id(result_o);
Expand All @@ -41,29 +61,27 @@ PyObject*
api_index_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs)
{
if (nargs != 2) {
py::set_error(PyExc_ValueError, MSG_ERROR_N_PARAMS);
iast_taint_log_error(MSG_ERROR_N_PARAMS);
py::set_error(PyExc_ValueError, MSG_ERROR_N_PARAMS);
return nullptr;
}

PyObject* candidate_text = args[0];
PyObject* idx = args[1];
auto result_o = PyObject_GetItem(candidate_text, idx);
if (!is_text(candidate_text) or !is_some_number(idx)) {
return result_o;
}
const auto result_o = PyObject_GetItem(candidate_text, idx);
TRY_CATCH_ASPECT("index_aspect", return result_o, , {
const auto ctx_map = Initializer::get_tainting_map();
if (not ctx_map or ctx_map->empty()) {
if (const auto error = has_pyerr_as_string(); !error.empty()) {
iast_taint_log_error(error);
return nullptr;
}

if ((!is_text(candidate_text) or !is_some_number(idx)) and !PyReMatch_Check(candidate_text)) {
return result_o;
}

auto error_str = has_pyerr_as_string();
if (!error_str.empty()) {
error_str += " (native index_aspect)";
iast_taint_log_error(error_str);
py::set_error(PyExc_IndexError, error_str.c_str());
return nullptr;
const auto ctx_map = Initializer::get_tainting_map();
if (not ctx_map or ctx_map->empty()) {
return result_o;
}

return index_aspect(result_o, candidate_text, idx, ctx_map);
Expand Down
Loading

0 comments on commit 2b74685

Please sign in to comment.