Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(iast): bytesio, stringio and read aspects #10686

Merged
merged 16 commits into from
Sep 24, 2024
Merged
18 changes: 18 additions & 0 deletions ddtrace/appsec/_common_module_patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
def patch_common_modules():
try_wrap_function_wrapper("builtins", "open", wrapped_open_CFDDB7ABBA9081B6)
try_wrap_function_wrapper("urllib.request", "OpenerDirector.open", wrapped_open_ED4CF71136E15EBF)
try_wrap_function_wrapper("_io", "BytesIO.read", wrapped_read_F3E51D71B4EC16EF)
try_wrap_function_wrapper("_io", "StringIO.read", wrapped_read_F3E51D71B4EC16EF)
try_wrap_function_wrapper("os", "system", wrapped_system_5542593D237084A7)
core.on("asm.block.dbapi.execute", execute_4C9BAC8E228EB347)
if asm_config._iast_enabled:
Expand All @@ -39,6 +41,22 @@ def patch_common_modules():
def unpatch_common_modules():
try_unwrap("builtins", "open")
try_unwrap("urllib.request", "OpenerDirector.open")
try_unwrap("_io", "BytesIO.read")
try_unwrap("_io", "StringIO.read")


def wrapped_read_F3E51D71B4EC16EF(original_read_callable, instance, args, kwargs):
gnufede marked this conversation as resolved.
Show resolved Hide resolved
"""
wrapper for _io.BytesIO and _io.StringIO read function
"""
result = original_read_callable(*args, **kwargs)
if asm_config._iast_enabled:
from ddtrace.appsec._iast._taint_tracking import copy_and_shift_ranges_from_strings
from ddtrace.appsec._iast._taint_tracking import is_pyobject_tainted

if is_pyobject_tainted(instance):
copy_and_shift_ranges_from_strings(instance, result, 0)
return result


def wrapped_open_CFDDB7ABBA9081B6(original_open_callable, instance, args, kwargs):
Expand Down
5 changes: 4 additions & 1 deletion ddtrace/appsec/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
from re import Match
import sys

from _io import BytesIO
from _io import StringIO


if sys.version_info >= (3, 8):
from typing import Literal # noqa:F401
Expand Down Expand Up @@ -122,7 +125,7 @@ class IAST(metaclass=Constant_Class):
SEP_MODULES: Literal[","] = ","
REQUEST_IAST_ENABLED: Literal["_dd.iast.request_enabled"] = "_dd.iast.request_enabled"
TEXT_TYPES = (str, bytes, bytearray)
TAINTEABLE_TYPES = (str, bytes, bytearray, Match)
TAINTEABLE_TYPES = (str, bytes, bytearray, Match, BytesIO, StringIO)


class IAST_SPAN_TAGS(metaclass=Constant_Class):
Expand Down
1 change: 0 additions & 1 deletion ddtrace/appsec/_iast/_ast/ast_patching.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,6 @@
"cattrs.",
"ddsketch.",
"ddtrace.",
"encodings.", # this package is used to load encodings when a module is imported, propagation is not needed
"envier.",
"exceptiongroup.",
"freezegun.", # Testing utilities for time manipulation
Expand Down
4 changes: 4 additions & 0 deletions ddtrace/appsec/_iast/_ast/visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,16 @@ def _mark_avoid_convert_recursively(node):
"definitions_module": "ddtrace.appsec._iast._taint_tracking.aspects",
"alias_module": "ddtrace_aspects",
"functions": {
"StringIO": "ddtrace_aspects.stringio_aspect",
"BytesIO": "ddtrace_aspects.bytesio_aspect",
"str": "ddtrace_aspects.str_aspect",
"bytes": "ddtrace_aspects.bytes_aspect",
"bytearray": "ddtrace_aspects.bytearray_aspect",
"ddtrace_iast_flask_patch": "ddtrace_aspects.empty_func", # To avoid recursion
},
"stringalike_methods": {
"StringIO": "ddtrace_aspects.stringio_aspect",
"BytesIO": "ddtrace_aspects.bytesio_aspect",
"decode": "ddtrace_aspects.decode_aspect",
"join": "ddtrace_aspects.join_aspect",
"encode": "ddtrace_aspects.encode_aspect",
Expand Down
8 changes: 7 additions & 1 deletion ddtrace/appsec/_iast/_taint_tracking/Utils/StringUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ get_unique_id(const PyObject* str)
return reinterpret_cast<uintptr_t>(str);
}

static bool
PyIOBase_Check(const PyObject* obj)
{
return py::isinstance((PyObject*)obj, py::module_::import("_io").attr("_IOBase"));
}

static bool
PyReMatch_Check(const PyObject* obj)
{
Expand All @@ -53,7 +59,7 @@ is_text(const PyObject* pyptr)
inline bool
is_tainteable(const PyObject* pyptr)
{
return pyptr != nullptr and (is_text(pyptr) or PyReMatch_Check(pyptr));
return pyptr != nullptr and (is_text(pyptr) or PyReMatch_Check(pyptr) or PyIOBase_Check(pyptr));
}

// Base function for the variadic template
Expand Down
6 changes: 4 additions & 2 deletions ddtrace/appsec/_iast/_taint_tracking/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from io import BytesIO
from io import StringIO
import os
from typing import Any
from typing import Tuple
Expand Down Expand Up @@ -220,9 +222,9 @@ def trace_calls_and_returns(frame, event, arg):
if frame in TAINTED_FRAMES:
TAINTED_FRAMES.remove(frame)
log.debug("Return from %s on line %d of %s, return value: %s", func_name, line_no, filename, arg)
if isinstance(arg, (str, bytes, bytearray, list, tuple, dict)):
if isinstance(arg, (str, bytes, bytearray, BytesIO, StringIO, list, tuple, dict)):
if (
(isinstance(arg, (str, bytes, bytearray)) and is_pyobject_tainted(arg))
(isinstance(arg, (str, bytes, bytearray, BytesIO, StringIO)) and is_pyobject_tainted(arg))
or (isinstance(arg, (list, tuple)) and any([is_pyobject_tainted(x) for x in arg]))
or (isinstance(arg, dict) and any([is_pyobject_tainted(x) for x in arg.values()]))
):
Expand Down
40 changes: 40 additions & 0 deletions ddtrace/appsec/_iast/_taint_tracking/aspects.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from typing import Tuple
from typing import Union

import _io

from ddtrace.appsec._constants import IAST

from .._taint_tracking import TagMappingMode
Expand Down Expand Up @@ -94,9 +96,47 @@
"ospathsplitext_aspect",
"ospathsplitdrive_aspect",
"ospathsplitroot_aspect",
"bytesio_aspect",
"stringio_aspect",
]


def stringio_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> _io.StringIO:
juanjux marked this conversation as resolved.
Show resolved Hide resolved
if orig_function is not None:
if flag_added_args > 0:
args = args[flag_added_args:]
result = orig_function(*args, **kwargs)
else:
if flag_added_args > 0:
args = args[flag_added_args:]
result = _io.StringIO(*args, **kwargs)

if args and is_pyobject_tainted(args[0]) and isinstance(result, _io.StringIO):
try:
copy_and_shift_ranges_from_strings(args[0], result, 0)
except Exception as e:
iast_taint_log_error("IAST propagation error. stringio_aspect. {}".format(e))
return result


def bytesio_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> _io.BytesIO:
juanjux marked this conversation as resolved.
Show resolved Hide resolved
if orig_function is not None:
if flag_added_args > 0:
args = args[flag_added_args:]
result = orig_function(*args, **kwargs)
else:
if flag_added_args > 0:
args = args[flag_added_args:]
result = _io.BytesIO(*args, **kwargs)

if args and is_pyobject_tainted(args[0]) and isinstance(result, _io.BytesIO):
try:
copy_and_shift_ranges_from_strings(args[0], result, 0)
except Exception as e:
iast_taint_log_error("IAST propagation error. bytesio_aspect. {}".format(e))
return result


def str_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> str:
if orig_function is not None:
if orig_function != builtin_str:
Expand Down
63 changes: 63 additions & 0 deletions tests/appsec/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,69 @@ def iast_ast_patching_import_error():
return Response(str(module_with_import_errors.verbal_kint_is_keyser_soze))


@app.route("/iast-ast-patching-io-bytesio", methods=["GET"])
def iast_ast_patching_io_bytes_io():
filename = request.args.get("filename")
style = request.args.get("style")
bytes_filename = filename.encode()
if style == "_io_module":
import _io

changed = _io.BytesIO(bytes_filename)
elif style == "io_module":
import io

changed = io.BytesIO(bytes_filename)
elif style == "io_function":
from io import BytesIO

changed = BytesIO(bytes_filename)
else:
from _io import BytesIO

changed = BytesIO(bytes_filename)
resp = Response("Fail")
try:
from ddtrace.appsec._iast._taint_tracking import is_pyobject_tainted

if is_pyobject_tainted(changed):
resp = Response("OK")
except Exception as e:
print(e)
return resp


@app.route("/iast-ast-patching-io-stringio", methods=["GET"])
def iast_ast_patching_io_string_io():
filename = request.args.get("filename")
style = request.args.get("style")
if style == "_io_module":
import _io

changed = _io.StringIO(filename)
elif style == "io_module":
import io

changed = io.StringIO(filename)
elif style == "io_function":
from io import StringIO

changed = StringIO(filename)
else:
from _io import StringIO

changed = StringIO(filename)
resp = Response("Fail")
try:
from ddtrace.appsec._iast._taint_tracking import is_pyobject_tainted

if is_pyobject_tainted(changed):
resp = Response("OK")
except Exception as e:
print(e)
return resp


@app.route("/iast-ast-patching-re-sub", methods=["GET"])
def iast_ast_patching_re_sub():
filename = request.args.get("filename")
Expand Down
36 changes: 36 additions & 0 deletions tests/appsec/iast/_ast/test_ast_patching.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,39 @@ def test_module_path_none(caplog):
with caplog.at_level(logging.DEBUG), mock.patch("ddtrace.internal.module.Path.resolve", side_effect=AttributeError):
assert ("", "") == astpatch_module(__import__("tests.appsec.iast.fixtures.ast.str.class_str", fromlist=[None]))
assert "astpatch_source couldn't find the module: tests.appsec.iast.fixtures.ast.str.class_str" in caplog.text


@pytest.mark.parametrize(
"module_name",
[
("tests.appsec.iast.fixtures.ast.io.module_stringio"),
("tests.appsec.iast.fixtures.ast.io.function_stringio"),
],
)
def test_astpatch_stringio_module_changed(module_name):
module_path, new_source = astpatch_module(__import__(module_name, fromlist=[None]))
assert ("", "") != (module_path, new_source)
new_code = astunparse.unparse(new_source)
assert new_code.startswith(
"\nimport ddtrace.appsec._iast.taint_sinks as ddtrace_taint_sinks"
"\nimport ddtrace.appsec._iast._taint_tracking.aspects as ddtrace_aspects"
)
assert "ddtrace_aspects.stringio_aspect(" in new_code


@pytest.mark.parametrize(
"module_name",
[
("tests.appsec.iast.fixtures.ast.io.module_bytesio"),
("tests.appsec.iast.fixtures.ast.io.function_bytesio"),
],
)
def test_astpatch_bytesio_module_changed(module_name):
module_path, new_source = astpatch_module(__import__(module_name, fromlist=[None]))
assert ("", "") != (module_path, new_source)
new_code = astunparse.unparse(new_source)
assert new_code.startswith(
"\nimport ddtrace.appsec._iast.taint_sinks as ddtrace_taint_sinks"
"\nimport ddtrace.appsec._iast._taint_tracking.aspects as ddtrace_aspects"
)
assert "ddtrace_aspects.bytesio_aspect(" in new_code
6 changes: 6 additions & 0 deletions tests/appsec/iast/fixtures/ast/io/function_bytesio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env python3
from _io import BytesIO


def fixture_function_bytesio():
return BytesIO(b"test")
6 changes: 6 additions & 0 deletions tests/appsec/iast/fixtures/ast/io/function_stringio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env python3
from _io import StringIO


def fixture_function_stringio():
return StringIO("test")
6 changes: 6 additions & 0 deletions tests/appsec/iast/fixtures/ast/io/module_bytesio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env python3
import _io


def fixture_function_bytesio():
return _io.BytesIO(b"test")
8 changes: 8 additions & 0 deletions tests/appsec/iast/fixtures/ast/io/module_read.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env python3


def read_from_io(my_object):
try:
my_object.read(5)
except AttributeError as e:
raise AttributeError("Object does not have a read method") from e
6 changes: 6 additions & 0 deletions tests/appsec/iast/fixtures/ast/io/module_stringio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env python3
import _io


def fixture_function_stringio():
return _io.StringIO("test")
4 changes: 3 additions & 1 deletion tests/appsec/iast/fixtures/propagation_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,6 @@ def propagation_memory_check(origin_string1, tainted_string_2):
_ = m.read()
except Exception:
pass
return string23
import _io

return _io.StringIO(string23).read()
24 changes: 24 additions & 0 deletions tests/appsec/integrations/test_flask_iast_patching.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,27 @@ def test_flask_iast_ast_patching_re(style, endpoint, function):

assert response.status_code == 200
assert response.content == b"OK"


@pytest.mark.parametrize("style", ["_io_module", "io_module", "io_function", "_io_function"])
@pytest.mark.parametrize(
"function",
[
"bytesio",
"stringio",
],
)
def test_flask_iast_ast_patching_io(style, function, endpoint="io"):
"""
Tests _io/io BytesIO and StringIO patching end to end
"""
filename = "path_traversal_test_file.txt"
with flask_server(
appsec_enabled="false", iast_enabled="true", token=None, port=8020, assert_debug=False
) as context:
_, flask_client, pid = context

response = flask_client.get(f"/iast-ast-patching-{endpoint}-{function}?style={style}&filename={filename}")

assert response.status_code == 200
assert response.content == b"OK"
Loading