Skip to content

Commit

Permalink
Transform START STOP timestamps to epoch time (#63)
Browse files Browse the repository at this point in the history
  • Loading branch information
delliott90 authored Nov 1, 2018
1 parent 329eacc commit 21f742a
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 37 deletions.
93 changes: 72 additions & 21 deletions stix_shifter/src/modules/qradar/aql_query_constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
CombinedComparisonExpression, CombinedObservationExpression, ObservationOperators
from stix_shifter.src.patterns.errors import SearchFeatureNotSupportedError

from stix_shifter.src.transformers import TimestampToEpoch, ValueTransformer
from stix_shifter.src.transformers import TimestampToMilliseconds, ValueTransformer


def _fetch_network_protocol_mapping():
Expand Down Expand Up @@ -49,23 +49,10 @@ def __init__(self, pattern: Pattern, data_model_mapper, result_limit):
self.result_limit = result_limit
self.translated = self.parse_expression(pattern)

# Split WHERE statements having a START STOP qualifier: AQL only supports one START STOP qualifier per query.
query_split = self.translated.split("SPLIT")
if len(query_split) > 1:
query_array = query_split
# removing leading AND/OR
query_array = list(map(lambda x: re.sub("^\s?(OR|AND)\s?", "", x), query_array))
# removing trailing AND/OR
query_array = list(map(lambda x: re.sub("\s?(OR|AND)\s?$", "", x), query_array))
# remove empty strings in the array
query_array = list(map(lambda x: x.strip(), list(filter(None, query_array))))
# transform time format from t'2014-04-25T15:51:20Z' into '2014-04-25 15:51:20'
big_t_pattern = "((?<=STARTt'\d{4}-\d{2}-\d{2})(T))|((?<=STOPt'\d{4}-\d{2}-\d{2})(T))"
query_array = list(map(lambda x: re.sub(big_t_pattern, " ", x), query_array))
big_z_pattern = "(?<=\d{2}:\d{2}:\d{2})Z"
query_array = list(map(lambda x: re.sub(big_z_pattern, "", x), query_array))
little_t_pattern = "(?<=START)t|(?<=STOP)t"
query_array = list(map(lambda x: re.sub(little_t_pattern, "", x), query_array))
self.queries = query_array
self.queries = _format_split_queries(query_split)
else:
self.queries = query_split

Expand Down Expand Up @@ -125,7 +112,7 @@ def _parse_expression(self, expression, qualifier=None) -> str:
raise KeyError(
"Network protocol {} is not supported.".format(protocol_key))
elif stix_field == 'start' or stix_field == 'end':
transformer = TimestampToEpoch()
transformer = TimestampToMilliseconds()
expression.value = transformer.transform(expression.value)

# Some values are formatted differently based on how they're being compared
Expand Down Expand Up @@ -154,7 +141,7 @@ def _parse_expression(self, expression, qualifier=None) -> str:
if mapped_field == 'domainname' and comparator != ComparisonComparators.Like:
comparator = self.comparator_lookup[ComparisonComparators.Like]
value = self._format_like(expression.value)

comparison_string += "{mapped_field} {comparator} {value}".format(
mapped_field=mapped_field, comparator=comparator, value=value)

Expand Down Expand Up @@ -211,18 +198,82 @@ def parse_expression(self, pattern: Pattern):
return self._parse_expression(pattern)


def _test_for_start_stop(query_string) -> bool:
pattern = "START'\d{4}(-\d{2}){2}\s\d{2}(:\d{2}){2}(\.\d+)?Z?'\s?STOP"
def _test_or_add_milliseconds(timestamp) -> str:
if not _test_timestamp(timestamp):
raise ValueError("Invalid timestamp")
# remove single quotes around timestamp
timestamp = re.sub("'", "", timestamp)
# check for 3-decimal milliseconds
pattern = "\.\d{3}Z$"
match = re.search(pattern, timestamp)
if bool(match):
return timestamp
else:
pattern = "(\.\d+Z$)|(Z$)"
timestamp = re.sub(pattern, ".000Z", timestamp)
return timestamp


def _test_START_STOP_format(query_string) -> bool:
# Matches STARTt'1234-56-78T00:00:00.123Z'STOPt'1234-56-78T00:00:00.123Z'
# or START 1234567890123 STOP 1234567890123
pattern = "START((t'\d{4}(-\d{2}){2}T\d{2}(:\d{2}){2}(\.\d+)?Z')|(\s\d{13}\s))STOP"
match = re.search(pattern, query_string)
return bool(match)


def _test_timestamp(timestamp) -> bool:
pattern = "^'\d{4}(-\d{2}){2}T\d{2}(:\d{2}){2}(\.\d+)?Z'$"
match = re.search(pattern, timestamp)
return bool(match)


def _convert_timestamps_to_milliseconds(query_parts):
# grab time stamps from array
start_time = _test_or_add_milliseconds(query_parts[2])
stop_time = _test_or_add_milliseconds(query_parts[4])
transformer = TimestampToMilliseconds()
millisecond_start_time = transformer.transform(start_time)
millisecond_stop_time = transformer.transform(stop_time)
return query_parts[0] + " " + query_parts[1] + " " + str(millisecond_start_time) + " " + query_parts[3] + " " + str(millisecond_stop_time)


def _format_split_queries(query_array):
# removing leading AND/OR
query_array = list(map(lambda x: re.sub("^\s?(OR|AND)\s?", "", x), query_array))
# removing trailing AND/OR
query_array = list(map(lambda x: re.sub("\s?(OR|AND)\s?$", "", x), query_array))
# remove empty strings in the array
query_array = list(map(lambda x: x.strip(), list(filter(None, query_array))))

# Transform from human-readable timestamp to 13-digit millisecond time
# Ex. START t'2014-04-25T15:51:20.000Z' to START 1398441080000
formatted_queries = []
for query in query_array:
if _test_START_STOP_format(query):
# Remove leading 't' before timestamps
query = re.sub("(?<=START)t|(?<=STOP)t", "", query)
# Split individual query to isolate timestamps
query_parts = re.split("(START)|(STOP)", query)
# Remove None array entries
query_parts = list(map(lambda x: x.strip(), list(filter(None, query_parts))))
if len(query_parts) == 5:
formatted_queries.append(_convert_timestamps_to_milliseconds(query_parts))
else:
logger.info("Omitting query due to bad format for START STOP qualifier timestamp")
continue
else:
formatted_queries.append(query)

return formatted_queries


def translate_pattern(pattern: Pattern, data_model_mapping, result_limit, timerange=None):
translated_where_statements = AqlQueryStringPatternTranslator(pattern, data_model_mapping, result_limit)
select_statement = translated_where_statements.dmm.map_selections()
queries = []
for where_statement in translated_where_statements.queries:
has_start_stop = _test_for_start_stop(where_statement)
has_start_stop = _test_START_STOP_format(where_statement)
if(has_start_stop):
queries.append("SELECT {} FROM events WHERE {}".format(select_statement, where_statement))
else:
Expand Down
21 changes: 14 additions & 7 deletions stix_shifter/src/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def transform(obj):
""" abstract function for converting value formats """
raise NotImplementedError


class StringToBool(ValueTransformer):
"""A value transformer for converting String to boolean value"""

Expand All @@ -28,6 +29,7 @@ class SplunkToTimestamp(ValueTransformer):
def transform(splunkTime):
return splunkTime[:-6]+'Z'


class EpochToTimestamp(ValueTransformer):
"""A value transformer for the timestamps"""

Expand All @@ -37,16 +39,18 @@ def transform(epoch):
.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z')


class TimestampToEpoch(ValueTransformer):
"""A value transformer for converting a UTC timestamp (YYYY-MM-DDThh:mm:ss.000Z) to epoch"""
class TimestampToMilliseconds(ValueTransformer):
"""
A value transformer for converting a UTC timestamp (YYYY-MM-DDThh:mm:ss.000Z)
to 13-digit Unix time (epoch + milliseconds)
"""

@staticmethod
def transform(timestamp):
time_pattern = '%Y-%m-%dT%H:%M:%S.%fZ'
epoch = datetime(1970, 1, 1)
converted_epoch = int(
(datetime.strptime(timestamp, time_pattern) - epoch).total_seconds())
return converted_epoch
converted_time = int(((datetime.strptime(timestamp, time_pattern) - epoch).total_seconds()) * 1000)
return converted_time


class ToInteger(ValueTransformer):
Expand Down Expand Up @@ -117,6 +121,7 @@ def transform(obj):
except ValueError:
print("Cannot convert input to file name")


class ToDomainName(ValueTransformer):
"""A value transformer for expected domain name"""

Expand All @@ -129,6 +134,8 @@ def transform(url):
except ValueError:
print("Cannot convert input to file name")


def get_all_transformers():
return {"SplunkToTimestamp": SplunkToTimestamp, "EpochToTimestamp": EpochToTimestamp, "ToInteger": ToInteger, "ToString": ToString, "ToLowercaseArray": ToLowercaseArray,
"ToBase64": ToBase64, "ToFilePath": ToFilePath, "ToFileName": ToFileName, "StringToBool": StringToBool, "ToDomainName": ToDomainName}
return {"SplunkToTimestamp": SplunkToTimestamp, "EpochToTimestamp": EpochToTimestamp, "ToInteger": ToInteger, "ToString": ToString,
"ToLowercaseArray": ToLowercaseArray, "ToBase64": ToBase64, "ToFilePath": ToFilePath, "ToFileName": ToFileName,
"StringToBool": StringToBool, "ToDomainName": ToDomainName, "TimestampToMilliseconds": TimestampToMilliseconds}
56 changes: 47 additions & 9 deletions tests/qradar_stix_to_aql/test_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,10 @@ def test_network_traffic_protocols(self):
assert query == {'queries': [selections + from_statement + where_statement], 'parsed_stix': parsed_stix}

def test_network_traffic_start_stop(self):
stix_pattern = "[network-traffic:'start' = '2018-06-14T08:36:24.000Z' or network-traffic:end = '2018-06-14T08:36:24.000Z']"
stix_pattern = "[network-traffic:'start' = '2018-06-14T08:36:24.000Z' or network-traffic:end = '2018-06-14T08:36:24.567Z']"
query = shifter.translate('qradar', 'query', '{}', stix_pattern)
where_statement = "WHERE endtime = '1528965384' OR starttime = '1528965384' {} {}".format(default_limit, default_time)
parsed_stix = [{'attribute': 'network-traffic:end', 'comparison_operator': '=', 'value': '2018-06-14T08:36:24.000Z'}, {'attribute': 'network-traffic:start', 'comparison_operator': '=', 'value': '2018-06-14T08:36:24.000Z'}]
where_statement = "WHERE endtime = '1528965384567' OR starttime = '1528965384000' {} {}".format(default_limit, default_time)
parsed_stix = [{'attribute': 'network-traffic:end', 'comparison_operator': '=', 'value': '2018-06-14T08:36:24.567Z'}, {'attribute': 'network-traffic:start', 'comparison_operator': '=', 'value': '2018-06-14T08:36:24.000Z'}]
assert query == {'queries': [selections + from_statement + where_statement], 'parsed_stix': parsed_stix}

def test_artifact_queries(self):
Expand All @@ -143,29 +143,67 @@ def test_artifact_queries(self):
assert query == {'queries': [selections + from_statement + where_statement], 'parsed_stix': parsed_stix}

def test_start_stop_qualifiers_with_two_observations(self):
stix_pattern = "[network-traffic:src_port = 37020 AND user-account:user_id = 'root'] START t'2016-06-01T01:30:00.123Z' STOP t'2016-06-01T02:20:00.123Z' OR [ipv4-addr:value = '192.168.122.83'] START t'2016-06-01T03:55:00.123Z' STOP t'2016-06-01T04:30:00.123Z'"
start_time_01 = "t'2016-06-01T01:30:00.123Z'"
stop_time_01 = "t'2016-06-01T02:20:00.123Z'"
start_time_02 = "t'2016-06-01T03:55:00.123Z'"
stop_time_02 = "t'2016-06-01T04:30:24.743Z'"
epoch_start_time_01 = 1464744600123
epoch_stop_time_01 = 1464747600123
epoch_start_time_02 = 1464753300123
epoch_stop_time_02 = 1464755424743
stix_pattern = "[network-traffic:src_port = 37020 AND user-account:user_id = 'root'] START {} STOP {} OR [ipv4-addr:value = '192.168.122.83'] START {} STOP {}".format(start_time_01, stop_time_01, start_time_02, stop_time_02)
query = shifter.translate('qradar', 'query', '{}', stix_pattern)
where_statement_01 = "WHERE username = 'root' AND sourceport = '37020' {} START'2016-06-01 01:30:00.123Z'STOP'2016-06-01 02:20:00.123Z'".format(default_limit)
where_statement_02 = "WHERE (sourceip = '192.168.122.83' OR destinationip = '192.168.122.83' OR identityip = '192.168.122.83') {} START'2016-06-01 03:55:00.123Z'STOP'2016-06-01 04:30:00.123Z'".format(default_limit)
where_statement_01 = "WHERE username = 'root' AND sourceport = '37020' {} START {} STOP {}".format(default_limit, epoch_start_time_01, epoch_stop_time_01)
where_statement_02 = "WHERE (sourceip = '192.168.122.83' OR destinationip = '192.168.122.83' OR identityip = '192.168.122.83') {} START {} STOP {}".format(default_limit, epoch_start_time_02, epoch_stop_time_02)
parsed_stix = [{'attribute': 'user-account:user_id', 'comparison_operator': '=', 'value': 'root'},
{'attribute': 'network-traffic:src_port', 'comparison_operator': '=', 'value': 37020},
{'attribute': 'ipv4-addr:value', 'comparison_operator': '=', 'value': '192.168.122.83'}]
assert len(query['queries']) == 2
assert query == {'queries': [selections + from_statement + where_statement_01, selections + from_statement + where_statement_02], 'parsed_stix': parsed_stix}

def test_start_stop_qualifiers_with_three_observations(self):
stix_pattern = "[network-traffic:src_port = 37020 AND network-traffic:dst_port = 635] START t'2016-06-01T00:00:00.123Z' STOP t'2016-06-01T01:11:11.456Z' OR [url:value = 'www.example.com'] OR [ipv4-addr:value = '333.333.333.0'] START t'2016-06-07T02:22:22.789Z' STOP t'2016-06-07T03:33:33.012Z'"
start_time_01 = "t'2016-06-01T00:00:00.123Z'"
stop_time_01 = "t'2016-06-01T01:11:11.456Z'"
start_time_02 = "t'2016-06-07T02:22:22.789Z'"
stop_time_02 = "t'2016-06-07T03:33:33.012Z'"
epoch_start_time_01 = 1464739200123
epoch_stop_time_01 = 1464743471456
epoch_start_time_02 = 1465266142789
epoch_stop_time_02 = 1465270413012
stix_pattern = "[network-traffic:src_port = 37020 AND network-traffic:dst_port = 635] START {} STOP {} OR [url:value = 'www.example.com'] OR [ipv4-addr:value = '333.333.333.0'] START {} STOP {}".format(
start_time_01, stop_time_01, start_time_02, stop_time_02)
query = shifter.translate('qradar', 'query', '{}', stix_pattern)
where_statement_01 = "WHERE destinationport = '635' AND sourceport = '37020' {} START'2016-06-01 00:00:00.123Z'STOP'2016-06-01 01:11:11.456Z'".format(default_limit)
where_statement_01 = "WHERE destinationport = '635' AND sourceport = '37020' {} START {} STOP {}".format(default_limit, epoch_start_time_01, epoch_stop_time_01)
where_statement_02 = "WHERE url = 'www.example.com' {} {}".format(default_limit, default_time)
where_statement_03 = "WHERE (sourceip = '333.333.333.0' OR destinationip = '333.333.333.0' OR identityip = '333.333.333.0') {} START'2016-06-07 02:22:22.789Z'STOP'2016-06-07 03:33:33.012Z'".format(default_limit)
where_statement_03 = "WHERE (sourceip = '333.333.333.0' OR destinationip = '333.333.333.0' OR identityip = '333.333.333.0') {} START {} STOP {}".format(default_limit, epoch_start_time_02, epoch_stop_time_02)
parsed_stix = [{'attribute': 'network-traffic:dst_port', 'comparison_operator': '=', 'value': 635},
{'attribute': 'network-traffic:src_port', 'comparison_operator': '=', 'value': 37020},
{'attribute': 'url:value', 'comparison_operator': '=', 'value': 'www.example.com'},
{'attribute': 'ipv4-addr:value', 'comparison_operator': '=', 'value': '333.333.333.0'}]
assert len(query['queries']) == 3
assert query == {'queries': [selections + from_statement + where_statement_01, selections + from_statement + where_statement_02, selections + from_statement + where_statement_03], 'parsed_stix': parsed_stix}

def test_start_stop_qualifiers_with_missing_or_partial_milliseconds(self):
# missing milliseconds
start_time_01 = "t'2016-06-01T01:30:00Z'"
stop_time_01 = "t'2016-06-01T02:20:00Z'"
# one-digit millisecond
start_time_02 = "t'2016-06-01T03:55:00.1Z'"
# four-digit millisecond
stop_time_02 = "t'2016-06-01T04:30:24.1243Z'"
epoch_start_time_01 = 1464744600000
epoch_stop_time_01 = 1464747600000
epoch_start_time_02 = 1464753300000
epoch_stop_time_02 = 1464755424000
stix_pattern = "[user-account:user_id = 'root'] START {} STOP {} OR [ipv4-addr:value = '192.168.122.83'] START {} STOP {}".format(start_time_01, stop_time_01, start_time_02, stop_time_02)
query = shifter.translate('qradar', 'query', '{}', stix_pattern)
where_statement_01 = "WHERE username = 'root' {} START {} STOP {}".format(default_limit, epoch_start_time_01, epoch_stop_time_01)
where_statement_02 = "WHERE (sourceip = '192.168.122.83' OR destinationip = '192.168.122.83' OR identityip = '192.168.122.83') {} START {} STOP {}".format(default_limit, epoch_start_time_02, epoch_stop_time_02)
parsed_stix = [{'attribute': 'user-account:user_id', 'comparison_operator': '=', 'value': 'root'},
{'attribute': 'ipv4-addr:value', 'comparison_operator': '=', 'value': '192.168.122.83'}]
assert len(query['queries']) == 2
assert query == {'queries': [selections + from_statement + where_statement_01, selections + from_statement + where_statement_02], 'parsed_stix': parsed_stix}

def test_set_operators(self):
stix_pattern = "[ipv4-addr:value ISSUBSET '198.51.100.0/24']"
query = shifter.translate('qradar', 'query', '{}', stix_pattern)
Expand Down

0 comments on commit 21f742a

Please sign in to comment.