Skip to content

Commit

Permalink
Fixes for issues relating to validation changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Onager committed Jul 16, 2024
1 parent 3d3dbed commit 740a900
Show file tree
Hide file tree
Showing 13 changed files with 72 additions and 27 deletions.
6 changes: 4 additions & 2 deletions data/recipes/gcp_logging_cloudaudit_ts.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
"name": "GCPLogsCollector",
"args": {
"project_name": "@project_name",
"filter_expression": "logName=projects/@project_name/logs/cloudaudit.googleapis.com%2Factivity timestamp>\"@start_date\" timestamp<\"@end_date\"",
"filter_expression": "logName=projects/@project_name/logs/cloudaudit.googleapis.com%2Factivity timestamp>\"<START_TIME>\" timestamp<\"<END_TIME>\"",
"backoff": "@backoff",
"delay": "@delay"
"delay": "@delay",
"start_time": "@start_date",
"end_time": "@end_date"
}
}, {
"wants": ["GCPLogsCollector"],
Expand Down
4 changes: 3 additions & 1 deletion data/recipes/gcp_logging_cloudsql_ts.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
"project_name": "@project_name",
"filter_expression": "logName:\"projects/@project_name/logs/cloudsql.googleapis.com\" timestamp>\"@start_date\" timestamp<\"@end_date\"",
"backoff": "@backoff",
"delay": "@delay"
"delay": "@delay",
"start_time": "@start_date",
"end_time": "@end_date"
}
},
{
Expand Down
4 changes: 3 additions & 1 deletion data/recipes/gcp_logging_collect.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
"project_name": "@project_name",
"filter_expression": "@filter_expression",
"backoff": "@backoff",
"delay": "@delay"
"delay": "@delay",
"start_time": null,
"end_time": null
}
}],
"args": [
Expand Down
4 changes: 3 additions & 1 deletion data/recipes/gcp_logging_gce_instance_ts.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
"project_name": "@project_name",
"filter_expression": "logName=projects/@project_name/logs/cloudaudit.googleapis.com%2Factivity operation.producer=\"compute.googleapis.com\" resource.labels.instance_id=\"@instance_id\"",
"backoff": "@backoff",
"delay": "@delay"
"delay": "@delay",
"start_time": null,
"end_time": null
}
}, {
"wants": ["GCPLogsCollector"],
Expand Down
6 changes: 4 additions & 2 deletions data/recipes/gcp_logging_gce_ts.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
"name": "GCPLogsCollector",
"args": {
"project_name": "@project_name",
"filter_expression": "logName=projects/@project_name/logs/cloudaudit.googleapis.com%2Factivity resource.type:\"gce\" timestamp>=\"@start_date\" timestamp<=\"@end_date\"",
"filter_expression": "logName=projects/@project_name/logs/cloudaudit.googleapis.com%2Factivity resource.type:\"gce\" timestamp>=\"<START_TIME>\" timestamp<=\"<END_TIME>\"",
"backoff": "@backoff",
"delay": "@delay"
"delay": "@delay",
"start_time": "@start_date",
"end_time": "@end_date"
}
}, {
"wants": ["GCPLogsCollector"],
Expand Down
4 changes: 3 additions & 1 deletion data/recipes/gcp_logging_ts.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
"project_name": "@project_name",
"filter_expression": "@filter_expression",
"backoff": "@backoff",
"delay": "@delay"
"delay": "@delay",
"start_time": null,
"end_time": null
}
},
{
Expand Down
53 changes: 42 additions & 11 deletions dftimewolf/lib/collectors/gcp_logging.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# -*- coding: utf-8 -*-
"""Reads logs from a GCP cloud project."""
import datetime
import json
import tempfile
import time
from typing import Optional, Dict, Any, Tuple
from typing import Any, Dict, Optional, Tuple

from google.api_core import exceptions as google_api_exceptions
from google.auth import exceptions as google_auth_exceptions
Expand All @@ -16,6 +17,7 @@
from dftimewolf.lib.modules import manager as modules_manager
from dftimewolf.lib.state import DFTimewolfState


# Monkey patching the ProtobufEntry because of various issues, notably
# https://github.com/googleapis/google-cloud-python/issues/7918

Expand All @@ -42,6 +44,8 @@ def __init__(self,
self._project_name = ''
self._backoff = False
self._delay = 0
self.start_time = None
self.end_time = None

def OutputFile(self) -> Tuple[Any, str]:
"""Generate an output file name and path"""
Expand All @@ -54,9 +58,6 @@ def OutputFile(self) -> Tuple[Any, str]:
def SetupLoggingClient(self) -> Any:
"""Sets up a GCP Logging Client
Args:
N/A
Returns:
logging.Client: A GCP logging client
"""
Expand Down Expand Up @@ -142,23 +143,53 @@ def ProcessPages(self, pages: Any, backoff_multiplier: int,
return output_path

# pylint: disable=arguments-differ
def SetUp(self, project_name: str, filter_expression: str, backoff: bool,
delay: str) -> None:
def SetUp(
self,
project_name: str,
filter_expression: str,
backoff: bool,
delay: str,
start_time: datetime.datetime,
end_time: datetime.datetime,
) -> None:
"""Sets up a a GCP logs collector.
Args:
project_name (str): name of the project to fetch logs from.
filter_expression (str): GCP advanced logs filter expression.
backoff (bool): Retry queries with an increased delay when API \
quotas are exceeded.
delay (str): Seconds to wait between retreiving results pages to \
avoid exceeding API quotas
backoff (bool): Retry queries with an increased delay when API quotas are
exceeded.
delay (str): Seconds to wait between retrieving results pages to avoid
exceeding API quotas
start_time: start time of the query. This will be used to replace
<START_TIME> in the queries.
end_time: end time of the query. This will be used to replace <END_TIME>
in the queries.
"""
self._project_name = project_name
self._filter_expression = filter_expression
self._backoff = backoff
self._delay = int(delay)

self.start_time = start_time
self.end_time = end_time

if start_time and end_time and start_time > end_time:
self.ModuleError(
f'Start date "{start_time}" must be before "{end_time}"',
critical=True,
)

if self.start_time:
filter_expression = filter_expression.replace(
'<START_TIME>', self.start_time.strftime('%Y%m%dT%H%M%S%z')
)
if self.end_time:
filter_expression = filter_expression.replace(
'<END_TIME>', self.end_time.strftime('%Y%m%dT%H%M%S%z')
)

self._filter_expression = filter_expression

def Process(self) -> None:
"""Copies logs from a cloud project."""

Expand Down
2 changes: 1 addition & 1 deletion dftimewolf/lib/validators/aws_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,6 @@ def Validate(self,
self.NAME,
'Invalid AWS Region name'))

return argument_value
return str(argument_value)

validators_manager.ValidatorsManager.RegisterValidator(AWSRegionValidator)
2 changes: 1 addition & 1 deletion dftimewolf/lib/validators/azure_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,6 @@ def Validate(self,
self.NAME,
'Invalid Azure Region name'))

return argument_value
return str(argument_value)

validators_manager.ValidatorsManager.RegisterValidator(AzureRegionValidator)
2 changes: 1 addition & 1 deletion dftimewolf/lib/validators/datetime_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def _EnsureTimezone(
Returns:
A datetime with timezone information.
"""
# If there's no timezone information, we assume it's UTC.
# If there's no timezone information, we assume it is UTC.
if (parsed_datetime.tzinfo is None or
parsed_datetime.tzinfo.utcoffset(parsed_datetime) is None):
parsed_datetime = parsed_datetime.replace(tzinfo=datetime.UTC)
Expand Down
2 changes: 1 addition & 1 deletion dftimewolf/lib/validators/gcp_zone.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,5 +70,5 @@ def Validate(self,
self.NAME,
'Invalid GCP Zone name')

return argument_value
return str(argument_value)
validators_manager.ValidatorsManager.RegisterValidator(GCPZoneValidator)
2 changes: 1 addition & 1 deletion dftimewolf/lib/validators/subnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ def ValidateSingle(self,
self.NAME,
'Not a valid subnet')

return argument_value
return str(argument_value)

validators_manager.ValidatorsManager.RegisterValidator(SubnetValidator)
8 changes: 5 additions & 3 deletions tests/lib/validators/datetime_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def testValidateSuccessWithOrder(self):
first_string = '2023-01-01 00:00:00'
second_string = '2023-01-02 00:00:00'
third_string = '2023-01-03 00:00:00'
third_datetime = datetime.datetime(2023, 1, 3, 0, 0, 0)
third_datetime = datetime.datetime(2023, 1, 3, 0, 0, 0, tzinfo=datetime.UTC)
fourth_string = '2023-01-04 00:00:00'
fifth_string = '2023-01-05 00:00:00'

Expand Down Expand Up @@ -91,11 +91,13 @@ def testValidate(self):
"""Tests the validate method."""
timeless_string = '20240101'
val = self.validator.Validate(timeless_string, self.recipe_argument)
self.assertEqual(val, datetime.datetime(2024, 1, 1, 23, 59, 59))
self.assertEqual(val, datetime.datetime(
2024, 1, 1, 23, 59, 59, tzinfo=datetime.UTC))

string_with_time = '2024-01-01 09:13:00'
val = self.validator.Validate(string_with_time, self.recipe_argument)
self.assertEqual(val, datetime.datetime(2024, 1, 1, 9, 13, 0))
self.assertEqual(val, datetime.datetime(
2024, 1, 1, 9, 13, 0, tzinfo=datetime.UTC))

if __name__ == '__main__':
unittest.main()

0 comments on commit 740a900

Please sign in to comment.