Skip to content

Commit

Permalink
feat(deid): keep some fields from DiagnosticReport.presentedForm
Browse files Browse the repository at this point in the history
Specifically, treat it the same as we treat DocumentReference.content.
- Strip data & url into _data & _url data-absent-reason extensions.
- But keep all the interesting metadata like contentType and language.
  • Loading branch information
mikix committed Nov 20, 2024
1 parent 3e6a17e commit 68084dc
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 8 deletions.
2 changes: 1 addition & 1 deletion cumulus_etl/deid/ms-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@
{"path": "DiagnosticReport.media.link", "method": "keep"},
// Skip DiagnosticReport.conclusion
{"path": "DiagnosticReport.conclusionCode", "method": "keep"},
// Skip DiagnosticReport.presentedForm (can add back later when/if we want to run NLP on it)
{"path": "DiagnosticReport.presentedForm", "method": "keep"}, // will be dropped later after running NLP on it

// ** DocumentReference: https://www.hl7.org/fhir/R4/documentreference.html **
// Skip DocumentReference.masterIdentifier
Expand Down
12 changes: 7 additions & 5 deletions cumulus_etl/deid/scrubber.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,12 +436,14 @@ def _check_text(self, key: str, value: Any) -> Any:
@staticmethod
def _check_attachments(resource_type: str, node_path: str, key: str, value: Any) -> Any:
"""Strip any attachment data"""
if (
resource_type == "DocumentReference"
and node_path == "root.content.attachment"
and key in {"data", "url"}
if any(
(
(resource_type == "DiagnosticReport" and node_path == "root.presentedForm"),
(resource_type == "DocumentReference" and node_path == "root.content.attachment"),
)
):
raise MaskValue
if key in {"data", "url"}:
raise MaskValue

return value

Expand Down
2 changes: 1 addition & 1 deletion tests/data/mstool/input/DiagnosticReport.ndjson
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@
}],
"conclusion" : "dropped",
"conclusionCode" : [{ "text": "kept" }],
"presentedForm" : [{ "title": "dropped" }]
"presentedForm" : [{ "data": "xxx", "title": "dropped" }]
}
3 changes: 2 additions & 1 deletion tests/data/mstool/output/DiagnosticReport.ndjson
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@
"media" : [{
"link" : { "reference": "Media/x" }
}],
"conclusionCode" : [{ "text": "kept" }]
"conclusionCode" : [{ "text": "kept" }],
"presentedForm" : [{ "data": "xxx" }]
}
39 changes: 39 additions & 0 deletions tests/deid/test_deid_scrubber.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,45 @@ def test_condition(self):
f"Encounter/{scrubber.codebook.fake_id('Encounter', '67890')}",
)

def test_diagnosticreport(self):
"""Verify a basic DiagnosticReport has attachments stripped"""
report = {
"resourceType": "DiagnosticReport",
"id": "dr1",
"presentedForm": [
{
"data": "blarg",
"language": "en",
"size": 5,
},
{
"url": "https://example.com/",
"contentType": "text/plain",
},
],
}

scrubber = Scrubber()
self.assertTrue(scrubber.scrub_resource(report))
self.assertEqual(
report,
{
"resourceType": "DiagnosticReport",
"id": scrubber.codebook.fake_id("DiagnosticReport", "dr1"),
"presentedForm": [
{
"_data": MASKED_EXTENSION,
"language": "en",
"size": 5,
},
{
"_url": MASKED_EXTENSION,
"contentType": "text/plain",
},
],
},
)

def test_documentreference(self):
"""Test DocumentReference, which is interesting because of its list of encounters and attachments"""
docref = {
Expand Down

0 comments on commit 68084dc

Please sign in to comment.