Skip to content

Commit

Permalink
Cache digest text for MySQL
Browse files Browse the repository at this point in the history
  • Loading branch information
sethsamuel committed Dec 23, 2024
1 parent aa9bd4e commit 24120d6
Showing 1 changed file with 51 additions and 3 deletions.
54 changes: 51 additions & 3 deletions mysql/datadog_checks/mysql/statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import Any, Callable, Dict, List, Tuple

import pymysql
from cachetools import TTLCache
from cachetools import TTLCache, Cache

from datadog_checks.base import is_affirmative
from datadog_checks.base.log import get_check_logger
Expand Down Expand Up @@ -91,6 +91,11 @@ def __init__(self, check, config, connection_args):
ttl=60 * 60 / self._config.full_statement_text_samples_per_hour_per_query,
) # type: TTLCache

# digest_text_cache: cache the full digest text for statements to avoid querying the db for the same digest
self._digest_text_cache = Cache(
maxsize=10 * 1000,
) # type: TTLCache

def _get_db_connection(self):
"""
lazy reconnect db
Expand Down Expand Up @@ -166,6 +171,8 @@ def collect_per_statement_metrics(self):
def _collect_per_statement_metrics(self):
# type: () -> List[PyMysqlRow]
monotonic_rows = self._query_summary_per_statement()
monotonic_rows = self._add_digest_text(monotonic_rows)
monotonic_rows = self._filter_query_rows(monotonic_rows)
monotonic_rows = self._normalize_queries(monotonic_rows)
rows = self._state.compute_derivative_rows(monotonic_rows, METRICS_COLUMNS, key=_row_key)
return rows
Expand All @@ -182,7 +189,6 @@ def _query_summary_per_statement(self):
sql_statement_summary = """\
SELECT `schema_name`,
`digest`,
`digest_text`,
`count_star`,
`sum_timer_wait`,
`sum_lock_time`,
Expand All @@ -195,7 +201,6 @@ def _query_summary_per_statement(self):
`sum_no_index_used`,
`sum_no_good_index_used`
FROM performance_schema.events_statements_summary_by_digest
WHERE `digest_text` NOT LIKE 'EXPLAIN %' OR `digest_text` IS NULL
ORDER BY `count_star` DESC
LIMIT 10000"""

Expand All @@ -206,6 +211,49 @@ def _query_summary_per_statement(self):

return rows

def _add_digest_text(self, rows):
# type: (List[PyMysqlRow]) -> List[PyMysqlRow]
"""
Add the full statement text to the rows
"""
saturated_rows = []
digests = []
# Find digests we don't have cached
for row in rows:
if self._digest_text_cache.get(row['digest']):
continue
digests.append(row['digest'])

if digests:
# Query for uncached digests
sql_statement_text = """\
SELECT `digest`, `digest_text`
FROM performance_schema.events_statements_summary_by_digest
WHERE `digest` IN ({})""".format(
",".join(["%s"] * len(digests))
)

with closing(self._get_db_connection().cursor(CommenterDictCursor)) as cursor:
self._log.warning("Querying for digest text %s %s", sql_statement_text, digests)
cursor.execute(sql_statement_text, digests)
digest_rows = cursor.fetchall() or []
for row in digest_rows:
self._digest_text_cache[row['digest']] = row['digest_text']

for row in rows:
row = dict(copy.copy(row))
row['digest_text'] = self._digest_text_cache.get(row['digest'], None)
saturated_rows.append(row)

return saturated_rows

def _filter_query_rows(self, rows):
# type: (List[PyMysqlRow]) -> List[PyMysqlRow]
"""
Filter out rows that are EXPLAIN statements
"""
return [row for row in rows if not row['digest_text'].lower().startswith('explain')]

def _normalize_queries(self, rows):
normalized_rows = []
for row in rows:
Expand Down

0 comments on commit 24120d6

Please sign in to comment.