diff --git a/.github/workflows/git-secrets.yml b/.github/workflows/git-secrets.yml deleted file mode 100644 index a3dce06..0000000 --- a/.github/workflows/git-secrets.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: git-secrets - -# Controls when the workflow will run -# Triggers the workflow on push or pull request events but only for the main branch -on: [push] - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel -jobs: - # This workflow contains a single job called "main" - git-secrets: - # The type of runner that the job will run on - runs-on: ubuntu-18.04 - - # Steps represent a sequence of tasks that will be executed as part of the job - steps: - - name: Check Out Source Code - uses: actions/checkout@v2 - - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - name: Installing dependencies - run: - sudo apt-get install git less openssh-server - - name: Installing scanning tool - run: | - brew install git-secrets - git secrets --install - git secrets --register-aws - - name: Running scanning tool - run: - git secrets --scan diff --git a/README.md b/README.md index cd975d7..1ee6921 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ You can start the dashboard metrics extractor either by running it as a python s Run the following command in the terminal: ```bash -$ curl -L -O https://github.com/logzio/dashboard-metrics-extractor/releases/download/V0.0.6/extract \ +$ curl -L -O https://github.com/logzio/dashboard-metrics-extractor/releases/download/V0.1.0/extract \ && sudo chmod 755 extract \ && ./extract ``` @@ -53,9 +53,11 @@ To specify the endpoints via a config file: To specify the endpoints manually: 1. Press Enter. -2. Type in the Prometheus endpoint address, for example `http://127.0.0.1:7000`, and press Enter. -3. Type in the Grafana endpoint address, for example `http://127.0.0.1:8000`, and press Enter. -4. Type in your Grafana API token and press Enter. +2. Type in the Grafana endpoint address, for example `http://127.0.0.1:8000`, and press Enter. +3. Type in your Grafana API token and press Enter. +4. Type in the Prometheus endpoint address, for example `http://127.0.0.1:7000`, and press Enter. +5. Type in the Prometheus timeseries count interval (only minutes timeframe is supported), for example `10m` or press Enter to use the default (5m). + #### Extract metrics from the Logz.io endpoint @@ -79,6 +81,7 @@ To specify the input via an API token: ### Example config: prometheus: endpoint: http://127.0.0.1:7000 + timeseries_count_interval: 5m // optional. Only minute timeframe is supported. defaults to 5m. grafana: endpoint: http://127.0.0.1:8000 diff --git a/metrics_dashboard_extractor.py b/metrics_dashboard_extractor.py index 3ea904d..b83ec09 100644 --- a/metrics_dashboard_extractor.py +++ b/metrics_dashboard_extractor.py @@ -236,14 +236,17 @@ def get_total_metrics_count(config): if grafana_config.get('endpoint') is not None: try: base_url = grafana_config.get('endpoint') + logger.info(f"Grafana endpoint base url: {base_url}") headers = {'Authorization': 'Bearer ' + grafana_config['token'], 'Content-Type': 'application/json', 'Accept': 'application/json'} + search_url=base_url + '/api/search' + logger.info(f"Grafana endpoint search url: {search_url}") response = requests.get( - base_url + '/api/search' + search_url , headers=headers) if response.status_code != 200: logger.error( - "Received status code: {} , cannot complete dashboards fetch".format(response.status_code)) + "Received status code: {}, cannot complete dashboards fetch".format(response.status_code)) return return _extract_dashboards_metrics(base_url, headers, response) except requests.HTTPError: diff --git a/settings_reader.py b/settings_reader.py index 6bea3e8..d186a09 100644 --- a/settings_reader.py +++ b/settings_reader.py @@ -1,6 +1,4 @@ import logging -import os - import yaml logger = logging.getLogger() @@ -24,5 +22,8 @@ def get_config() -> dict: grafana_endpoint = input('No config file found, please enter grafana endpoint: ') grafana_api_token = input('Please enter grafana api token: ') prometheus_endpoint = input('Please enter prometheus endpoint: ') + prometheus_used_timeseries_interval = input( + 'Please enter prometheus timeseries count interval in minutes (i.e 1m, 2m) or enter to use the default of 5m (recommended): ') return {'grafana': {'endpoint': grafana_endpoint, 'token': grafana_api_token}, - 'prometheus': {'endpoint': prometheus_endpoint}} + 'prometheus': {'endpoint': prometheus_endpoint, + 'timeseries_count_interval': prometheus_used_timeseries_interval}} diff --git a/timeseries_extractor.py b/timeseries_extractor.py index 244dcba..060169b 100644 --- a/timeseries_extractor.py +++ b/timeseries_extractor.py @@ -1,62 +1,99 @@ import json import logging +import re import requests logger = logging.getLogger() PROMETHEUS_API_QUERY_PREFIX = '/api/v1/query?query=' -PROMETHEUS_TOTAL_TIMESERIES_COUNT_METRIC = 'prometheus_tsdb_head_series' +PROMETHEUS_TOTAL_TIMESERIES_COUNT_METRIC = 'prometheus_tsdb_head_series[' PROMETHEUS_LAST_OVER_TIME_QUERY_PREFIX = 'last_over_time(' PROMETHEUS_COUNT_FUNCTION_PREFIX = 'count(' CLOSING_PERENTHESIS = ')' -PROMETHEUS_FIVE_MINUTES_INTERVAL_TIME_FUNCTION_SUFFIX = '[5m])' +PROMETHEUS_INTERVAL_TIME_FUNCTION_SUFFIX = f'])' PROMETHEUS_METRIC_NAME_PREFIX = '{__name__=~"' PROMETHEUS_METRIC_NAME_CLOSING_PERENTHESIS = '"}' +PROMETHEUS_ACTIVE_TIMESERIES_INTERVAL_REGEX = r'^\d+m$' +DEFAULT_TIMESERIES_COUNT_INTERVAL = '5m' -def _count_prometheus_total_timeseries(response): - response_json = json.loads(response.content) - timeseries_count = response_json.get('data').get('result')[0].get('value')[1] - print('Total time series count: {}'.format(timeseries_count)) +def _count_prometheus_total_timeseries(response, total_count_timseries_interval): + if response.status_code == 200: + response_json = json.loads(response.content) + try: + timeseries_count = response_json.get('data').get('result')[0].get('value')[1] + logger.info(f'Total time series count: {timeseries_count}') + except Exception: + timeseries_count = 0 + logger.info(f'Total time series in the last {total_count_timseries_interval}: {timeseries_count}') + else: + logger.error(f"Prometheus API returned error: {response.error_code} for total timeseries count query") def get_prometheus_timeseries_count(config: dict, metrics): try: prometheus_config = config['prometheus'] if prometheus_config.get('endpoint'): - _get_total_timeseries_count(prometheus_config.get('endpoint')) - _get_used_timeseries_count(prometheus_config.get('endpoint'), metrics) + timeseries_interval = extract_timeseries_interval(prometheus_config) + _get_total_timeseries_count(prometheus_config.get('endpoint'), timeseries_interval) + if metrics: + _get_used_timeseries_count(prometheus_config.get('endpoint'), + timeseries_interval, metrics) + else: + logger.error( + "An error occurred when fetching distinct metrics from grafana dashboards, skipping count of used timeseries count") else: logger.info("No prometheus endpoint found, skipping timeseries count") except KeyError: logger.error('Invalid config for prometheus server, skipping time series count') -def _get_used_timeseries_count(endpoint, metrics): +def extract_timeseries_interval(prometheus_config): + timeseries_interval = prometheus_config.get('timeseries_count_interval') + if timeseries_interval and not re.match(PROMETHEUS_ACTIVE_TIMESERIES_INTERVAL_REGEX, + timeseries_interval): + logger.info( + f"Timeseries count interval was not entered or invalid, using default of {DEFAULT_TIMESERIES_COUNT_INTERVAL}") + timeseries_interval = DEFAULT_TIMESERIES_COUNT_INTERVAL + return timeseries_interval + + +def _get_used_timeseries_count(endpoint, used_timeseries_interval, metrics): query_url = endpoint + PROMETHEUS_API_QUERY_PREFIX + PROMETHEUS_COUNT_FUNCTION_PREFIX + PROMETHEUS_LAST_OVER_TIME_QUERY_PREFIX + PROMETHEUS_METRIC_NAME_PREFIX metrics_regex = '' for i, metric in enumerate(metrics): metrics_regex += metric if i < len(metrics) - 1: metrics_regex += '|' - query_url += metrics_regex + PROMETHEUS_METRIC_NAME_CLOSING_PERENTHESIS + PROMETHEUS_FIVE_MINUTES_INTERVAL_TIME_FUNCTION_SUFFIX + CLOSING_PERENTHESIS + query_url += metrics_regex + PROMETHEUS_METRIC_NAME_CLOSING_PERENTHESIS + f'[{used_timeseries_interval}])' + CLOSING_PERENTHESIS + logger.info(f"Prometheus used timeseries count query url: {query_url}") response = requests.get(query_url) - response_json = json.loads(response.content) - used_timeseries_count = response_json.get('data').get('result')[0].get('value')[1] - print(f'Total used time series in the last 5m: {used_timeseries_count}') + if response.status_code == 200: + response_json = json.loads(response.content) + try: + used_timeseries_count = response_json.get('data').get('result')[0].get('value')[1] + except Exception: + used_timeseries_count = 0 + logger.info(f'Total used time series in the last {used_timeseries_interval}: {used_timeseries_count}') + else: + logger.error( + "Recieved status code: {} from prometheus, cannot complete the used time series request".format( + response.status_code)) -def _get_total_timeseries_count(endpoint): +def _get_total_timeseries_count(endpoint, total_count_timeseries_interval): + total_timeseries_count_url = endpoint + PROMETHEUS_API_QUERY_PREFIX + PROMETHEUS_LAST_OVER_TIME_QUERY_PREFIX + PROMETHEUS_TOTAL_TIMESERIES_COUNT_METRIC + total_count_timeseries_interval + PROMETHEUS_INTERVAL_TIME_FUNCTION_SUFFIX + logger.info(f"Prometheus total timeseries count query url: {total_timeseries_count_url}") try: response = requests.get( - endpoint + PROMETHEUS_API_QUERY_PREFIX + PROMETHEUS_LAST_OVER_TIME_QUERY_PREFIX + PROMETHEUS_TOTAL_TIMESERIES_COUNT_METRIC + PROMETHEUS_FIVE_MINUTES_INTERVAL_TIME_FUNCTION_SUFFIX) + total_timeseries_count_url) if (response.status_code != 200): logger.error( "Recieved status code: {} from prometheus, cannot complete the total time series request".format( response.status_code)) return - _count_prometheus_total_timeseries(response) + _count_prometheus_total_timeseries(response, total_count_timeseries_interval) except requests.HTTPError: logger.error( "Cannot get a response from prometheus server, please check the config file")