-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1379 from GSA/API-1328_Logging_Formatter_With_Scrub
API-1328 - Using a custom formatter to scrub PII from all log records.
- Loading branch information
Showing
2 changed files
with
1,285 additions
and
1,135 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
import re | ||
import sys | ||
from itertools import product | ||
from typing import Any, override | ||
|
||
from flask import g, request | ||
from flask.ctx import has_app_context, has_request_context | ||
|
@@ -17,6 +18,40 @@ | |
|
||
logger = logging.getLogger(__name__) | ||
|
||
_phone_regex = re.compile("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d") | ||
_email_regex = re.compile(r"[\w\.-]+@[\w\.-]+") # ['[email protected]', '[email protected]'] | ||
|
||
|
||
def _scrub(msg: Any) -> Any: | ||
# Sometimes just an exception object is passed in for the message, skip those. | ||
if not isinstance(msg, str): | ||
return msg | ||
phones = _phone_regex.findall(msg) | ||
|
||
phones = [phone.replace("-", "").replace(" ", "") for phone in phones] | ||
for phone in phones: | ||
msg = msg.replace(phone, "1XXXXXXXXXX") | ||
|
||
emails = _email_regex.findall(msg) | ||
for email in emails: | ||
# do something with each found email string | ||
masked_email = "XXXXX@XXXXXXX" | ||
msg = msg.replace(email, masked_email) | ||
return msg | ||
|
||
|
||
class PIIFilter(logging.Filter): | ||
@override | ||
def filter(self, record: logging.LogRecord) -> logging.LogRecord: | ||
record.msg = _scrub(record.msg) | ||
return record | ||
|
||
|
||
class PIIFormatter(logging.Formatter): | ||
def format(self, record: logging.LogRecord) -> str: | ||
record.msg = _scrub(record.msg) | ||
return super().format(record) | ||
|
||
|
||
def init_app(app): | ||
app.config.setdefault("NOTIFY_LOG_LEVEL", "INFO") | ||
|
@@ -50,7 +85,7 @@ def init_app(app): | |
|
||
def get_handlers(app): | ||
handlers = [] | ||
standard_formatter = logging.Formatter(LOG_FORMAT, TIME_FORMAT) | ||
standard_formatter = PIIFormatter(LOG_FORMAT, TIME_FORMAT) | ||
json_formatter = JSONFormatter(LOG_FORMAT, TIME_FORMAT) | ||
|
||
stream_handler = logging.StreamHandler(sys.stdout) | ||
|
@@ -123,36 +158,6 @@ def filter(self, record): | |
return record | ||
|
||
|
||
class PIIFilter(logging.Filter): | ||
def scrub(self, msg): | ||
# Eventually we want to scrub all messages in all logs for phone numbers | ||
# and email addresses, masking them. Ultimately this will probably get | ||
# refactored into a 'SafeLogger' subclass or something, but let's start here | ||
# with phones. | ||
|
||
# Sometimes just an exception object is passed in for the message, skip those. | ||
if not isinstance(msg, str): | ||
return msg | ||
phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg) | ||
|
||
phones = [phone.replace("-", "").replace(" ", "") for phone in phones] | ||
for phone in phones: | ||
msg = msg.replace(phone, "1XXXXXXXXXX") | ||
|
||
emails = re.findall( | ||
r"[\w\.-]+@[\w\.-]+", msg | ||
) # ['[email protected]', '[email protected]'] | ||
for email in emails: | ||
# do something with each found email string | ||
masked_email = "XXXXX@XXXXXXX" | ||
msg = msg.replace(email, masked_email) | ||
return msg | ||
|
||
def filter(self, record): | ||
record.msg = self.scrub(record.msg) | ||
return record | ||
|
||
|
||
class JSONFormatter(BaseJSONFormatter): | ||
def process_log_record(self, log_record): | ||
rename_map = { | ||
|
@@ -166,6 +171,7 @@ def process_log_record(self, log_record): | |
log_record["logType"] = "application" | ||
try: | ||
log_record["message"] = log_record["message"].format(**log_record) | ||
log_record["message"] = _scrub(log_record["message"]) # PII Scrubbing | ||
except KeyError as e: | ||
# We get occasional log messages that are nested dictionaries, | ||
# for example, delivery receipts, where the formatting fails | ||
|
Oops, something went wrong.