From bdfa9e564355894284c7a34e717c2c5ca400c2e8 Mon Sep 17 00:00:00 2001 From: Stephen Slogar Date: Wed, 11 Dec 2024 08:26:49 -0500 Subject: [PATCH] Python: Add log group argument to CloudWatchQuery. Addresses #7022 (#7066) --- python/example_code/cloudwatch-logs/README.md | 6 ++--- .../scenarios/large-query/README.md | 1 + .../scenarios/large-query/cloudwatch_query.py | 24 +++++++++++-------- .../scenarios/large-query/exec.py | 24 +++++++++++++++---- 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/python/example_code/cloudwatch-logs/README.md b/python/example_code/cloudwatch-logs/README.md index 92988703db5..a7da60c829c 100644 --- a/python/example_code/cloudwatch-logs/README.md +++ b/python/example_code/cloudwatch-logs/README.md @@ -38,8 +38,8 @@ python -m pip install -r requirements.txt Code excerpts that show you how to call individual service functions. -- [GetQueryResults](scenarios/large-query/cloudwatch_query.py#L200) -- [StartQuery](scenarios/large-query/cloudwatch_query.py#L126) +- [GetQueryResults](scenarios/large-query/cloudwatch_query.py#L204) +- [StartQuery](scenarios/large-query/cloudwatch_query.py#L130) ### Scenarios @@ -106,4 +106,4 @@ in the `python` folder. Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -SPDX-License-Identifier: Apache-2.0 \ No newline at end of file +SPDX-License-Identifier: Apache-2.0 diff --git a/python/example_code/cloudwatch-logs/scenarios/large-query/README.md b/python/example_code/cloudwatch-logs/scenarios/large-query/README.md index fd53fafc27c..9adfd42679a 100644 --- a/python/example_code/cloudwatch-logs/scenarios/large-query/README.md +++ b/python/example_code/cloudwatch-logs/scenarios/large-query/README.md @@ -33,6 +33,7 @@ Use the following steps to create the necessary resources in AWS CloudFormation 1. Run `aws cloudformation deploy --template-file stack.yaml --stack-name CloudWatchLargeQuery` 1. Run `./make-log-files.sh`. This will output two timestamps for use in the following step. 1. Run `export QUERY_START_DATE=`. Replace `` with the output from the previous step. Repeat this for `QUERY_END_DATE`. +1. Optional: Run `export QUERY_LOG_GROUP=`. Replace `` with your preferred log group. 1. Run `./put-log-events.sh`. 1. Wait five minutes for logs to settle and to make sure you're not querying for logs that exist in the future. diff --git a/python/example_code/cloudwatch-logs/scenarios/large-query/cloudwatch_query.py b/python/example_code/cloudwatch-logs/scenarios/large-query/cloudwatch_query.py index a356f236ba3..a59a9825117 100644 --- a/python/example_code/cloudwatch-logs/scenarios/large-query/cloudwatch_query.py +++ b/python/example_code/cloudwatch-logs/scenarios/large-query/cloudwatch_query.py @@ -8,6 +8,8 @@ from date_utilities import DateUtilities +DEFAULT_QUERY = "fields @timestamp, @message | sort @timestamp asc" +DEFAULT_LOG_GROUP = "/workflows/cloudwatch-logs/large-query" class DateOutOfBoundsError(Exception): """Exception raised when the date range for a query is out of bounds.""" @@ -19,17 +21,18 @@ class CloudWatchQuery: """ A class to query AWS CloudWatch logs within a specified date range. - :ivar date_range: Start and end datetime for the query. :vartype date_range: tuple :ivar limit: Maximum number of log entries to return. :vartype limit: int + :log_group str: Name of the log group to query + :query_string str: query """ - def __init__(self, date_range): + def __init__(self, log_group: str = DEFAULT_LOG_GROUP, query_string: str=DEFAULT_QUERY) -> None: self.lock = threading.Lock() - self.log_groups = "/workflows/cloudwatch-logs/large-query" + self.log_group = log_group + self.query_string = query_string self.query_results = [] - self.date_range = date_range self.query_duration = None self.datetime_format = "%Y-%m-%d %H:%M:%S.%f" self.date_utilities = DateUtilities() @@ -50,8 +53,9 @@ def query_logs(self, date_range): logging.info( f"Original query:" - f"\n START: {start_date}" - f"\n END: {end_date}" + f"\n START: {start_date}" + f"\n END: {end_date}" + f"\n LOG GROUP: {self.log_group}" ) self.recursive_query((start_date, end_date)) end_time = datetime.now() @@ -143,10 +147,10 @@ def perform_query(self, date_range): self.date_utilities.convert_iso8601_to_unix_timestamp(date_range[1]) ) response = client.start_query( - logGroupName=self.log_groups, + logGroupName=self.log_group, startTime=start_time, endTime=end_time, - queryString="fields @timestamp, @message | sort @timestamp asc", + queryString=self.query_string, limit=self.limit, ) query_id = response["queryId"] @@ -185,10 +189,10 @@ def _initiate_query(self, client, date_range, max_logs): self.date_utilities.convert_iso8601_to_unix_timestamp(date_range[1]) ) response = client.start_query( - logGroupName=self.log_groups, + logGroupName=self.log_group, startTime=start_time, endTime=end_time, - queryString="fields @timestamp, @message | sort @timestamp asc", + queryString=self.query_string, limit=max_logs, ) return response["queryId"] diff --git a/python/example_code/cloudwatch-logs/scenarios/large-query/exec.py b/python/example_code/cloudwatch-logs/scenarios/large-query/exec.py index 402e6346563..db641fea3e4 100644 --- a/python/example_code/cloudwatch-logs/scenarios/large-query/exec.py +++ b/python/example_code/cloudwatch-logs/scenarios/large-query/exec.py @@ -16,6 +16,8 @@ format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s", ) +DEFAULT_QUERY_LOG_GROUP = "/workflows/cloudwatch-logs/large-query" + class CloudWatchLogsQueryRunner: def __init__(self): @@ -42,8 +44,10 @@ def create_cloudwatch_logs_client(self): def fetch_environment_variables(self): """ Fetches and validates required environment variables for query start and end dates. + Fetches the environment variable for log group, returning the default value if it + does not exist. - :return: Tuple of query start date and end date as integers. + :return: Tuple of query start date and end date as integers and the log group. :rtype: tuple :raises SystemExit: If required environment variables are missing or invalid. """ @@ -58,8 +62,14 @@ def fetch_environment_variables(self): except ValueError as e: logging.error(f"Error parsing date environment variables: {e}") sys.exit(1) + + try: + log_group = os.environ["QUERY_LOG_GROUP"] + except KeyError: + logging.warning("No QUERY_LOG_GROUP environment variable, using default value") + log_group = DEFAULT_QUERY_LOG_GROUP - return query_start_date, query_end_date + return query_start_date, query_end_date, log_group def convert_dates_to_iso8601(self, start_date, end_date): """ @@ -85,6 +95,7 @@ def execute_query( start_date_iso8601, end_date_iso8601, log_group="/workflows/cloudwatch-logs/large-query", + query="fields @timestamp, @message | sort @timestamp asc" ): """ Creates a CloudWatchQuery instance and executes the query with provided date range. @@ -95,9 +106,12 @@ def execute_query( :type end_date_iso8601: str :param log_group: Log group to search: "/workflows/cloudwatch-logs/large-query" :type log_group: str + :param query: Query string to pass to the CloudWatchQuery instance + :type query: str """ cloudwatch_query = CloudWatchQuery( - [start_date_iso8601, end_date_iso8601], + log_group=log_group, + query_string=query ) cloudwatch_query.query_logs((start_date_iso8601, end_date_iso8601)) logging.info("Query executed successfully.") @@ -113,12 +127,12 @@ def main(): """ logging.info("Starting a recursive CloudWatch logs query...") runner = CloudWatchLogsQueryRunner() - query_start_date, query_end_date = runner.fetch_environment_variables() + query_start_date, query_end_date, log_group = runner.fetch_environment_variables() start_date_iso8601 = DateUtilities.convert_unix_timestamp_to_iso8601( query_start_date ) end_date_iso8601 = DateUtilities.convert_unix_timestamp_to_iso8601(query_end_date) - runner.execute_query(start_date_iso8601, end_date_iso8601) + runner.execute_query(start_date_iso8601, end_date_iso8601, log_group=log_group) if __name__ == "__main__":