From 7ad9553051e0496bf75108fde442bfa4a7f14bfb Mon Sep 17 00:00:00 2001 From: Graham Beckley Date: Wed, 24 Jul 2024 14:00:41 -0400 Subject: [PATCH] Remove `trace` ability for logs --- ctms/config.py | 4 - ctms/routers/contacts.py | 29 +----- docs/deployment_guide.md | 11 --- tests/unit/routers/contacts/test_api_get.py | 36 -------- .../contacts/test_api_get_by_alt_id.py | 89 ------------------- tests/unit/routers/contacts/test_api_patch.py | 24 ----- tests/unit/routers/contacts/test_api_post.py | 33 ------- tests/unit/routers/contacts/test_api_put.py | 57 ------------ 8 files changed, 2 insertions(+), 281 deletions(-) diff --git a/ctms/config.py b/ctms/config.py index 0ddb24c0..602e0a42 100644 --- a/ctms/config.py +++ b/ctms/config.py @@ -1,5 +1,4 @@ import json -import re from datetime import timedelta from enum import Enum from functools import lru_cache @@ -8,9 +7,6 @@ from pydantic import BaseSettings, PostgresDsn -# If primary email matches, then add trace to logs -re_trace_email = re.compile(r".*\+trace-me-mozilla-.*@.*") - @lru_cache() def get_version(): diff --git a/ctms/routers/contacts.py b/ctms/routers/contacts.py index cda285a7..93bf61c9 100644 --- a/ctms/routers/contacts.py +++ b/ctms/routers/contacts.py @@ -8,7 +8,6 @@ from sqlalchemy.orm import Session from starlette import status -from ctms.config import re_trace_email from ctms.crud import ( create_contact, create_or_update_contact, @@ -161,13 +160,6 @@ def read_ctms_by_any_id( ) raise HTTPException(status_code=400, detail=detail) contacts = get_contacts_by_any_id(db, **ids) - traced = set() - for contact in contacts: - email = contact.email.primary_email - if re_trace_email.match(email): - traced.add(email) - if traced: - request.state.log_context["trace"] = ",".join(sorted(traced)) return [CTMSResponse(**contact.dict()) for contact in contacts] @@ -188,9 +180,6 @@ def read_ctms_by_email_id( api_client: ApiClientSchema = Depends(get_enabled_api_client), ): resp = get_ctms_response_or_404(db, email_id) - email = resp.email.primary_email - if re_trace_email.match(email): - request.state.log_context["trace"] = email return resp @@ -218,10 +207,6 @@ def create_ctms_contact( email_id = contact.email.email_id existing = get_contact_by_email_id(db, email_id) if existing: - email = existing.email.primary_email - if re_trace_email.match(email): - request.state.log_context["trace"] = email - request.state.log_context["trace_json"] = content_json if ContactInSchema(**existing.dict()).idempotent_equal(contact): response.headers["Location"] = f"/ctms/{email_id}" response.status_code = 200 @@ -238,10 +223,6 @@ def create_ctms_contact( response.headers["Location"] = f"/ctms/{email_id}" response.status_code = 201 resp_data = get_ctms_response_or_404(db=db, email_id=email_id) - email = resp_data.email.primary_email - if re_trace_email.match(email): - request.state.log_context["trace"] = email - request.state.log_context["trace_json"] = content_json return resp_data @@ -272,10 +253,7 @@ def create_or_update_ctms_contact( ) else: contact.email.email_id = email_id - email = contact.email.primary_email - if re_trace_email.match(email): - request.state.log_context["trace"] = email - request.state.log_context["trace_json"] = content_json + try: create_or_update_contact(db, email_id, contact, get_metrics()) db.commit() @@ -323,10 +301,7 @@ def partial_update_ctms_contact( current_email = get_email_or_404(db, email_id) update_data = contact.dict(exclude_unset=True) update_contact(db, current_email, update_data, get_metrics()) - email = current_email.primary_email - if re_trace_email.match(email): - request.state.log_context["trace"] = email - request.state.log_context["trace_json"] = content_json + try: db.commit() except Exception as e: # pylint:disable = W0703 diff --git a/docs/deployment_guide.md b/docs/deployment_guide.md index e8d63f0d..9555bc7a 100644 --- a/docs/deployment_guide.md +++ b/docs/deployment_guide.md @@ -78,13 +78,6 @@ Some headers contain security sensitive information, such as client credentials access token. These values are replaced by ``[OMITTED]``. Similar replacement removes emails from the query string (when parsed). -If tracing by email is desired, a tester can trace their activity in the system -by adding the text ``+trace_me_mozilla_`` to their email, such as -``test+trace_me_mozilla_20211207@example.com``. This works with the -[plus-sign trick](https://gmail.googleblog.com/2008/03/2-hidden-ways-to-get-more-from-your.html) -in Gmail and other email providers to create a unique email address. -This causes the email to appear in logs, along with further request context -for some endpoints. ### API Logging Fields @@ -114,10 +107,6 @@ traceback) are logged at ``ERROR`` level (Severity 3). Some of the fields are: * ``token_creds_from``: For ``/token``, if the credentials were read from the ``Authentication`` header ("header") or from the form-encoded body ("form") * ``token_fail``: For ``/token``, why the token request failed -* ``trace_json``: The request payload if tracing via the email address is - requested. -* ``trace``: An email matching the trace pattern, such as - ``test+trace_me_mozilla_2021@example.com`` ## Metrics diff --git a/tests/unit/routers/contacts/test_api_get.py b/tests/unit/routers/contacts/test_api_get.py index 7a9b34da..0af62fe8 100644 --- a/tests/unit/routers/contacts/test_api_get.py +++ b/tests/unit/routers/contacts/test_api_get.py @@ -1,11 +1,6 @@ """Unit tests for GET /ctms/{email_id}""" -from uuid import uuid4 - import pytest -from structlog.testing import capture_logs - -from ctms.models import Email def test_get_ctms_for_minimal_contact(client, dbsession, email_factory): @@ -359,37 +354,6 @@ def test_get_ctms_not_found(client, dbsession): assert resp.json() == {"detail": "Unknown email_id"} -def test_get_ctms_not_traced(client, example_contact): - """Most CTMS contacts are not traced.""" - email_id = example_contact.email.email_id - with capture_logs() as caplog: - resp = client.get(f"/ctms/{email_id}") - assert resp.status_code == 200 - assert len(caplog) == 1 - assert "trace" not in caplog[0] - - -def test_get_ctms_with_tracing(client, dbsession): - """The log parameter trace is set when a traced email is requested.""" - email_id = uuid4() - email = "test+trace-me-mozilla-123@example.com" - record = Email( - email_id=email_id, - primary_email=email, - double_opt_in=False, - email_format="T", - has_opted_out_of_email=False, - ) - dbsession.add(record) - dbsession.commit() - with capture_logs() as caplog: - resp = client.get(f"/ctms/{email_id}") - assert resp.status_code == 200 - assert len(caplog) == 1 - assert caplog[0]["trace"] == email - assert "trace_json" not in caplog[0] - - @pytest.mark.parametrize("waitlist_name", ["vpn", "relay"]) def test_get_ctms_without_geo_in_waitlist( waitlist_name, client, dbsession, waitlist_factory diff --git a/tests/unit/routers/contacts/test_api_get_by_alt_id.py b/tests/unit/routers/contacts/test_api_get_by_alt_id.py index d26c99a3..9be6519c 100644 --- a/tests/unit/routers/contacts/test_api_get_by_alt_id.py +++ b/tests/unit/routers/contacts/test_api_get_by_alt_id.py @@ -1,11 +1,6 @@ """Unit tests for GET /ctms?alt_id=value, returning list of contacts""" -from uuid import uuid4 - import pytest -from structlog.testing import capture_logs - -from ctms.models import AmoAccount, Email, MozillaFoundationContact @pytest.mark.parametrize( @@ -72,87 +67,3 @@ def test_get_ctms_by_alt_id_none_found(client, dbsession, alt_id_name, alt_id_va assert resp.status_code == 200 data = resp.json() assert len(data) == 0 - - -def test_get_not_traced(client, example_contact): - """Most CTMS contacts are not traced.""" - params = {"primary_email": example_contact.email.primary_email} - with capture_logs() as caplog: - resp = client.get("/ctms", params=params) - assert resp.status_code == 200 - data = resp.json() - assert len(data) == 1 - assert len(caplog) == 1 - assert "trace" not in caplog[0] - - -def test_get_with_tracing(client, dbsession, maximal_contact): - """The log parameter trace is set when a traced email is requested.""" - email_id = uuid4() - mofo_contact_id = maximal_contact.mofo.mofo_contact_id - email = "test+trace-me-mozilla-123@example.com" - record = Email( - email_id=email_id, - primary_email=email, - double_opt_in=False, - email_format="T", - has_opted_out_of_email=False, - ) - mofo = MozillaFoundationContact( - email_id=email_id, - mofo_relevant=True, - mofo_contact_id=mofo_contact_id, - mofo_email_id=uuid4(), - ) - dbsession.add(record) - dbsession.add(mofo) - dbsession.commit() - with capture_logs() as caplog: - resp = client.get("/ctms", params={"mofo_contact_id": str(mofo_contact_id)}) - assert resp.status_code == 200 - data = resp.json() - assert len(data) == 2 - assert len(caplog) == 1 - assert caplog[0]["trace"] == email - assert "trace_json" not in caplog[0] - - -def test_get_multiple_with_tracing(client, dbsession): - """Multiple traced emails are comma-joined.""" - email_id1 = uuid4() - email1 = "test+trace-me-mozilla-1@example.com" - dbsession.add( - Email( - email_id=email_id1, - primary_email=email1, - double_opt_in=False, - email_format="T", - has_opted_out_of_email=False, - ) - ) - dbsession.add( - AmoAccount(email_id=email_id1, user_id="amo123", email_opt_in=False, user=True) - ) - email_id2 = uuid4() - email2 = "test+trace-me-mozilla-2@example.com" - dbsession.add( - Email( - email_id=email_id2, - primary_email=email2, - double_opt_in=False, - email_format="T", - has_opted_out_of_email=False, - ) - ) - dbsession.add( - AmoAccount(email_id=email_id2, user_id="amo123", email_opt_in=True, user=True) - ) - dbsession.commit() - with capture_logs() as caplog: - resp = client.get("/ctms", params={"amo_user_id": "amo123"}) - assert resp.status_code == 200 - data = resp.json() - assert len(data) == 2 - assert len(caplog) == 1 - assert caplog[0]["trace"] == f"{email1},{email2}" - assert "trace_json" not in caplog[0] diff --git a/tests/unit/routers/contacts/test_api_patch.py b/tests/unit/routers/contacts/test_api_patch.py index d5d809ef..01cfa8be 100644 --- a/tests/unit/routers/contacts/test_api_patch.py +++ b/tests/unit/routers/contacts/test_api_patch.py @@ -4,7 +4,6 @@ from uuid import uuid4 import pytest -from structlog.testing import capture_logs from ctms.schemas import ( AddOnsInSchema, @@ -421,29 +420,6 @@ def test_patch_to_delete_deleted_group(client, minimal_contact): assert actual["mofo"] == default_mofo -def test_patch_no_trace(client, minimal_contact): - """PATCH does not trace most contacts""" - email_id = minimal_contact.email.email_id - patch_data = {"email": {"first_name": "Jeff"}} - with capture_logs() as caplogs: - resp = client.patch(f"/ctms/{email_id}", json=patch_data) - assert resp.status_code == 200 - assert len(caplogs) == 1 - assert "trace" not in caplogs[0] - - -def test_patch_with_trace(client, minimal_contact): - """PATCH traces by email""" - email_id = minimal_contact.email.email_id - patch_data = {"email": {"primary_email": "jeff+trace-me-mozilla-1@example.com"}} - with capture_logs() as caplogs: - resp = client.patch(f"/ctms/{email_id}", json=patch_data) - assert resp.status_code == 200 - assert len(caplogs) == 1 - assert caplogs[0]["trace"] == "jeff+trace-me-mozilla-1@example.com" - assert caplogs[0]["trace_json"] == patch_data - - def test_patch_will_validate_waitlist_fields(client, maximal_contact): """PATCH validates waitlist schema.""" email_id = maximal_contact.email.email_id diff --git a/tests/unit/routers/contacts/test_api_post.py b/tests/unit/routers/contacts/test_api_post.py index 36f4952e..7a32eaf7 100644 --- a/tests/unit/routers/contacts/test_api_post.py +++ b/tests/unit/routers/contacts/test_api_post.py @@ -3,7 +3,6 @@ from uuid import UUID import pytest -from structlog.testing import capture_logs from tests.unit.conftest import SAMPLE_CONTACT_PARAMS @@ -120,16 +119,6 @@ def _change_primary_email(contact): _compare_written_contacts(saved_contacts[0], orig_sample, email_id) -def test_create_without_trace(client): - """Most contacts are not traced.""" - data = {"email": {"primary_email": "test+no-trace@example.com"}} - with capture_logs() as cap_logs: - resp = client.post("/ctms", json=data) - assert resp.status_code == 201 - assert len(cap_logs) == 1 - assert "trace" not in cap_logs[0] - - def test_create_with_non_json_is_error(client): """When non-JSON is posted /ctms, a 422 is returned""" data = b"this is not JSON" @@ -139,25 +128,3 @@ def test_create_with_non_json_is_error(client): assert resp.json()["detail"][0]["msg"] == "JSON decode error" assert len(cap_logs) == 1 assert "trace" not in cap_logs[0] - - -def test_create_with_trace(client, status_code=201): - """A contact is traced by email.""" - data = { - "email": { - "email_id": "7eec2254-fba7-485b-a921-6308c929dd25", - "primary_email": "test+trace-me-mozilla-May-2021@example.com", - } - } - with capture_logs() as cap_logs: - resp = client.post("/ctms", json=data) - assert resp.status_code == status_code - assert len(cap_logs) == 1 - assert cap_logs[0]["trace"] == "test+trace-me-mozilla-May-2021@example.com" - assert cap_logs[0]["trace_json"] == data - - -def test_recreate_with_trace(client): - """A idempotent re-create is traced by email.""" - test_create_with_trace(client) - test_create_with_trace(client, 200) diff --git a/tests/unit/routers/contacts/test_api_put.py b/tests/unit/routers/contacts/test_api_put.py index 4e73d773..39626a74 100644 --- a/tests/unit/routers/contacts/test_api_put.py +++ b/tests/unit/routers/contacts/test_api_put.py @@ -1,6 +1,5 @@ """Unit tests for PUT /ctms/{email_id} (Create or update)""" -import json from uuid import UUID, uuid4 import pytest @@ -123,31 +122,6 @@ def _change_primary_email(contact): _compare_written_contacts(saved_contacts[0], orig_sample, email_id) -def test_put_create_no_trace(client, dbsession): - """PUT does not trace most new contacts""" - email_id = str(uuid4()) - data = { - "email": {"email_id": email_id, "primary_email": "test+no-trace@example.com"} - } - with capture_logs() as caplogs: - resp = client.put(f"/ctms/{email_id}", json=data) - assert resp.status_code == 201 - assert len(caplogs) == 1 - assert "trace" not in caplogs[0] - - -def test_put_replace_no_trace(client, minimal_contact): - """PUT does not trace most replaced contacts""" - email_id = minimal_contact.email.email_id - data = json.loads(minimal_contact.json()) - data["email"]["first_name"] = "Jeff" - with capture_logs() as caplogs: - resp = client.put(f"/ctms/{email_id}", json=data) - assert resp.status_code == 201 - assert len(caplogs) == 1 - assert "trace" not in caplogs[0] - - def test_put_with_not_json_is_error(client, dbsession): """Calling PUT with a text body is a 422 validation error.""" email_id = str(uuid4()) @@ -158,34 +132,3 @@ def test_put_with_not_json_is_error(client, dbsession): assert resp.json()["detail"][0]["msg"] == "JSON decode error" assert len(caplogs) == 1 assert "trace" not in caplogs[0] - - -def test_put_create_with_trace(client, dbsession): - """PUT traces new contacts by email address""" - email_id = str(uuid4()) - data = { - "email": { - "email_id": email_id, - "primary_email": "test+trace-me-mozilla-2021-05-13@example.com", - } - } - with capture_logs() as caplogs: - resp = client.put(f"/ctms/{email_id}", json=data) - assert resp.status_code == 201 - assert len(caplogs) == 1 - assert caplogs[0]["trace"] == "test+trace-me-mozilla-2021-05-13@example.com" - assert caplogs[0]["trace_json"] == data - - -def test_put_replace_with_trace(client, minimal_contact): - """PUT traces replaced contacts by email""" - email_id = minimal_contact.email.email_id - data = json.loads(minimal_contact.json()) - data["email"]["first_name"] = "Jeff" - data["email"]["primary_email"] = "test+trace-me-mozilla-2021-05-13@example.com" - with capture_logs() as caplogs: - resp = client.put(f"/ctms/{email_id}", json=data) - assert resp.status_code == 201 - assert len(caplogs) == 1 - assert caplogs[0]["trace"] == "test+trace-me-mozilla-2021-05-13@example.com" - assert caplogs[0]["trace_json"] == data