From 7a36f7eefb27972d0bb92909aaa6ccd50c80ea3c Mon Sep 17 00:00:00 2001 From: Michael Terry Date: Wed, 10 Jan 2024 12:59:36 -0500 Subject: [PATCH] feat!: rename chart-review to upload-notes "chart-review" is an overloaded term and conflicts with the name of some tooling we are also crafting for help with post-review statistics. So let's use upload-notes instead, which is more descriptive of what we are actually doing with the command. chart-review will stay as an alias for now. --- compose.yaml | 10 +++ cumulus_etl/chart_review/__init__.py | 3 - cumulus_etl/cli.py | 14 ++- cumulus_etl/deid/philter-config.toml | 2 +- cumulus_etl/fhir/fhir_utils.py | 4 +- cumulus_etl/upload_notes/__init__.py | 3 + .../{chart_review => upload_notes}/cli.py | 18 ++-- .../downloader.py | 0 .../labelstudio.py | 0 .../selector.py | 0 docs/chart-review.md | 90 +++++++++---------- tests/test_cli.py | 9 +- .../__init__.py | 0 .../test_upload_cli.py} | 48 +++++----- .../test_upload_labelstudio.py} | 8 +- 15 files changed, 114 insertions(+), 95 deletions(-) delete mode 100644 cumulus_etl/chart_review/__init__.py create mode 100644 cumulus_etl/upload_notes/__init__.py rename cumulus_etl/{chart_review => upload_notes}/cli.py (95%) rename cumulus_etl/{chart_review => upload_notes}/downloader.py (100%) rename cumulus_etl/{chart_review => upload_notes}/labelstudio.py (100%) rename cumulus_etl/{chart_review => upload_notes}/selector.py (100%) rename tests/{chart_review => upload_notes}/__init__.py (100%) rename tests/{chart_review/test_chart_cli.py => upload_notes/test_upload_cli.py} (91%) rename tests/{chart_review/test_chart_labelstudio.py => upload_notes/test_upload_labelstudio.py} (96%) diff --git a/compose.yaml b/compose.yaml index ff6038c5..0c2aa62a 100644 --- a/compose.yaml +++ b/compose.yaml @@ -51,24 +51,34 @@ services: ctakes-covid: extends: ctakes-covid-base profiles: + # chart-review is a deprecated alias for upload-notes since Jan 2024. + # Remove when you feel like it. - chart-review - chart-review-gpu - covid-symptom - covid-symptom-gpu + - upload-notes + - upload-notes-gpu cnlpt-negation: image: smartonfhir/cnlp-transformers:negation-0.6.1-cpu profiles: + # chart-review is a deprecated alias for upload-notes since Jan 2024. + # Remove when you feel like it. - chart-review - covid-symptom + - upload-notes networks: - cumulus-etl cnlpt-negation-gpu: image: smartonfhir/cnlp-transformers:negation-0.6.1-gpu profiles: + # chart-review-gpu is a deprecated alias for upload-notes-gpu since Jan 2024. + # Remove when you feel like it. - chart-review-gpu - covid-symptom-gpu + - upload-notes-gpu networks: - cumulus-etl deploy: diff --git a/cumulus_etl/chart_review/__init__.py b/cumulus_etl/chart_review/__init__.py deleted file mode 100644 index 441f1554..00000000 --- a/cumulus_etl/chart_review/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Chart review""" - -from .cli import run_chart_review diff --git a/cumulus_etl/cli.py b/cumulus_etl/cli.py index 1aa089a7..865e6dd5 100644 --- a/cumulus_etl/cli.py +++ b/cumulus_etl/cli.py @@ -9,14 +9,20 @@ import rich.logging -from cumulus_etl import chart_review, common, etl +from cumulus_etl import common, etl, upload_notes from cumulus_etl.etl import convert class Command(enum.Enum): + """Subcommand strings""" + + # chart-review is a deprecated alias of upload-notes since Jan 2024. + # Keep as long as you like. + # It's a low-usage feature, but it's not a maintenance burden to keep this around. CHART_REVIEW = "chart-review" CONVERT = "convert" ETL = "etl" + UPLOAD_NOTES = "upload-notes" # Why isn't this part of Enum directly...? @classmethod @@ -56,8 +62,8 @@ async def main(argv: list[str]) -> None: prog += f" {subcommand}" # to make --help look nicer parser = argparse.ArgumentParser(prog=prog) - if subcommand == Command.CHART_REVIEW.value: - run_method = chart_review.run_chart_review + if subcommand in {Command.CHART_REVIEW.value, Command.UPLOAD_NOTES.value}: + run_method = upload_notes.run_upload_notes elif subcommand == Command.CONVERT.value: run_method = convert.run_convert else: @@ -65,7 +71,7 @@ async def main(argv: list[str]) -> None: if not subcommand: # Add a note about other subcommands we offer, and tell argparse not to wrap our formatting parser.formatter_class = argparse.RawDescriptionHelpFormatter - parser.description += "\n\n" "other commands available:\n" " chart-review\n" " convert" + parser.description += "\n\n" "other commands available:\n" " convert\n" " upload-notes" run_method = etl.run_etl with tempfile.TemporaryDirectory() as tempdir: diff --git a/cumulus_etl/deid/philter-config.toml b/cumulus_etl/deid/philter-config.toml index 5469fca3..1bd2cd3c 100644 --- a/cumulus_etl/deid/philter-config.toml +++ b/cumulus_etl/deid/philter-config.toml @@ -5,7 +5,7 @@ # - Removed all DATE filters. # This is because Cumulus in general finds accurate dates to be useful. # For example, we don't remove dates in the MS anonymizer config, and we also want dates for -# non-ETL activities like chart review. +# non-ETL activities like upload-notes. [[filters]] title = "ucsf apex safe" diff --git a/cumulus_etl/fhir/fhir_utils.py b/cumulus_etl/fhir/fhir_utils.py index 0ad6b790..d2f9cec3 100644 --- a/cumulus_etl/fhir/fhir_utils.py +++ b/cumulus_etl/fhir/fhir_utils.py @@ -211,10 +211,10 @@ async def get_docref_note(client: FhirClient, docref: dict) -> str: if best_attachment_mimetype in ("text/html", "application/xhtml+xml"): # An HTML note can confuse/stall cTAKES and also makes philtering difficult. # It may include mountains of spans/styling or inline base64 images that aren't relevant to our interests. - # Chart Review and ETL modes thus both prefer to work with plain text. + # Upload Notes and ETL modes thus both prefer to work with plain text. # # Inscriptis makes a very readable version of the note, with a focus on maintaining the HTML layout, - # which is especially helpful for chart-review (and maybe also helps NLP by avoiding odd line breaks). + # which is especially helpful for upload-notes (and maybe also helps NLP by avoiding odd line breaks). note = inscriptis.get_text(note) # Strip this "line feed" character that often shows up in notes and is confusing for NLP. diff --git a/cumulus_etl/upload_notes/__init__.py b/cumulus_etl/upload_notes/__init__.py new file mode 100644 index 00000000..f03e434b --- /dev/null +++ b/cumulus_etl/upload_notes/__init__.py @@ -0,0 +1,3 @@ +"""upload-notes""" + +from .cli import run_upload_notes diff --git a/cumulus_etl/chart_review/cli.py b/cumulus_etl/upload_notes/cli.py similarity index 95% rename from cumulus_etl/chart_review/cli.py rename to cumulus_etl/upload_notes/cli.py index cb7f1c33..e179d294 100644 --- a/cumulus_etl/chart_review/cli.py +++ b/cumulus_etl/upload_notes/cli.py @@ -9,8 +9,8 @@ from ctakesclient.typesystem import Polarity from cumulus_etl import cli_utils, common, deid, errors, fhir, nlp, store -from cumulus_etl.chart_review import downloader, selector -from cumulus_etl.chart_review.labelstudio import LabelStudioClient, LabelStudioNote +from cumulus_etl.upload_notes import downloader, selector +from cumulus_etl.upload_notes.labelstudio import LabelStudioClient, LabelStudioNote def init_checks(args: argparse.Namespace): @@ -198,8 +198,8 @@ def push_to_label_studio( ##################################################################################################################### -def define_chart_review_parser(parser: argparse.ArgumentParser) -> None: - parser.usage = "%(prog)s [OPTION]... INPUT LS_URL PHI" +def define_upload_notes_parser(parser: argparse.ArgumentParser) -> None: + parser.usage = "cumulus-etl upload-notes [OPTION]... INPUT LS_URL PHI" parser.add_argument("dir_input", metavar="/path/to/input") parser.add_argument("label_studio_url", metavar="https://example.com/labelstudio") @@ -236,7 +236,7 @@ def define_chart_review_parser(parser: argparse.ArgumentParser) -> None: cli_utils.add_debugging(parser) -async def chart_review_main(args: argparse.Namespace) -> None: +async def upload_notes_main(args: argparse.Namespace) -> None: """ Prepare for chart review by uploading some documents to Label Studio. @@ -267,8 +267,8 @@ async def chart_review_main(args: argparse.Namespace) -> None: push_to_label_studio(notes, access_token, labels, args) -async def run_chart_review(parser: argparse.ArgumentParser, argv: list[str]) -> None: - """Parses a chart review CLI""" - define_chart_review_parser(parser) +async def run_upload_notes(parser: argparse.ArgumentParser, argv: list[str]) -> None: + """Parses an upload-notes CLI""" + define_upload_notes_parser(parser) args = parser.parse_args(argv) - await chart_review_main(args) + await upload_notes_main(args) diff --git a/cumulus_etl/chart_review/downloader.py b/cumulus_etl/upload_notes/downloader.py similarity index 100% rename from cumulus_etl/chart_review/downloader.py rename to cumulus_etl/upload_notes/downloader.py diff --git a/cumulus_etl/chart_review/labelstudio.py b/cumulus_etl/upload_notes/labelstudio.py similarity index 100% rename from cumulus_etl/chart_review/labelstudio.py rename to cumulus_etl/upload_notes/labelstudio.py diff --git a/cumulus_etl/chart_review/selector.py b/cumulus_etl/upload_notes/selector.py similarity index 100% rename from cumulus_etl/chart_review/selector.py rename to cumulus_etl/upload_notes/selector.py diff --git a/docs/chart-review.md b/docs/chart-review.md index 0392aab6..dccb4e4d 100644 --- a/docs/chart-review.md +++ b/docs/chart-review.md @@ -8,7 +8,9 @@ nav_order: 20 # Chart Review -Cumulus ETL also offers a chart review mode, +Chart review is a critical part of study validation. + +Cumulus ETL offers an upload mode, where it sends clinical notes to your own [Label Studio](https://labelstud.io/) instance for expert review. Along the way, it can mark the note with NLP results and/or anonymize the note with @@ -27,31 +29,31 @@ They offer Docker images and reasonable If you haven't set that up yet, go do that and come back. The Cumulus team can help you with setting it up if you come talk to us, -but the rest of this guide will mostly deal with chart review mode itself. +but the rest of this guide will mostly deal with the `upload-notes` mode itself. ### Dependent Services -Some features of chart review mode need external services (like cTAKES to run NLP). -Launch those before you begin using chart review: +Some features of upload mode need external services (like cTAKES to run NLP). +Launch those before you begin: ```shell export UMLS_API_KEY=your-umls-api-key -docker compose --profile chart-review up -d +docker compose --profile upload-notes up -d ``` Or if you have access to a GPU, -you can speed up the NLP by launching the GPU profile instead with `--profile chart-review-gpu`. +you can speed up the NLP by launching the GPU profile instead with `--profile upload-notes-gpu`. ## Basic Operation -At its core, chart review mode is just another ETL (extract, transform, load) operation. +At its core, upload mode is just another ETL (extract, transform, load) operation. 1. It extracts DocumentReference resources from your EHR. 2. It transforms the contained notes via NLP & `philter`. 3. It loads the results into Label Studio. ### Minimal Command Line -Chart review mode takes three main arguments: +Upload mode takes three main arguments: 1. Input path (local dir of ndjson or a FHIR server to perform a bulk export on) 2. URL for Label Studio 3. PHI/build path (the same PHI/build path you normally provide to Cumulus ETL) @@ -60,12 +62,12 @@ Additionally, there are two required Label Studio parameters: 1. `--ls-token PATH` (a file holding your Label Studio authentication token) 2. `--ls-project ID` (the number of the Label Studio project you want to push notes to) -Taken altogether, here is an example minimal chart review command: +Taken altogether, here is an example minimal `upload-notes` command: ```sh docker compose run \ --volume /local/path:/in \ cumulus-etl \ - chart-review \ + upload-notes \ --ls-token /in/label-studio-token.txt \ --ls-project 3 \ https://my-ehr-server/R4/12345/Group/67890 \ @@ -80,7 +82,7 @@ and then push the results to your Label Studio project number `3`. ### Grouping by Encounter -Chart review mode will group all notes by encounter and present them together as a single +Upload mode will group all notes by encounter and present them together as a single Label Studio artifact. Each clinical note will have a little header describing what type of note it is ("Admission MD"), @@ -89,20 +91,20 @@ to make it easier to reference back to your EHR or Athena data. ## Bulk Export Options -You can point chart review mode at either a folder with DocumentReference ndjson files +You can point upload mode at either a folder with DocumentReference ndjson files or your EHR server (in which case it will do a bulk export from the target Group). -Chart review mode takes all the same [bulk export options](bulk-exports.md) that the normal +Upload mode takes all the same [bulk export options](bulk-exports.md) that the normal ETL mode supports. Note that even if you provide a folder of DocumentReference ndjson resources, you will still likely need to pass `--fhir-url` and FHIR authentication options, -so that chart review mode can download the referenced clinical notes _inside_ the DocumentReference, +so that upload mode can download the referenced clinical notes _inside_ the DocumentReference, which usually hold an external URL rather than inline note data. ## Document Selection Options -By default, chart review mode will grab _all documents_ in the target Group or folder. +By default, upload mode will grab _all documents_ in the target Group or folder. But usually you will probably want to only select a few documents for testing purposes. More in the realm of 10-100 specific documents. @@ -123,14 +125,14 @@ docref_id Then pass in an argument like `--docrefs /in/docrefs.csv`. -Chart review mode will only export & process the specified documents, saving a lot of time. +Upload mode will only export & process the specified documents, saving a lot of time. ### By Anonymized ID If you are working with your existing de-identified limited data set in Athena, you will only have references to the anonymized document IDs and no direct clinical notes. But that's fine! -Chart review mode can use the PHI folder to grab the cached mappings of patient IDs +Upload mode can use the PHI folder to grab the cached mappings of patient IDs and then work to reverse-engineer the correct document IDs (to then download from the EHR). For this to work, you will need to provide both the anonymized docref ID **and** @@ -151,7 +153,7 @@ limit 10; You'll notice we are defining two columns: patient_id and docref_id (those must be the names). Then, pass in an argument like `--anon-docrefs /in/docrefs.csv`. -Chart review mode will reverse-engineer the original document IDs and export them from your EHR. +Upload mode will reverse-engineer the original document IDs and export them from your EHR. #### I Thought the Anonymized IDs Could Not Be Reversed? @@ -160,10 +162,10 @@ But Cumulus ETL saves a cache of all the IDs it makes for your patients (and enc You can see this cache in your PHI folder, named `codebook-cached-mappings.json`. (It's worth emphasizing that the contents of this file are never moved outside the PHI folder, -and are only used for chart review mode.) +and are only used for upload mode.) By using this mapping file, -chart review mode can find all the original patient IDs using the `patient_id` column you gave it. +Upload mode can find all the original patient IDs using the `patient_id` column you gave it. Once it has the original patients, it will ask the EHR for all of those patients' documents. And it will anonymize each document ID it sees. @@ -177,7 +179,7 @@ When it sees a match for one of the anonymous docref IDs you gave in the `docref It might be useful to save the exported documents from the EHR (or even the smaller selection from a giant ndjson folder), -for faster iterations of the chart review mode or +for faster iterations of the upload mode or just confirming the correct documents were chosen. Pass in an argument like `--export-to /in/export` to save the ndjson for the selected documents @@ -191,36 +193,34 @@ simply pass in a new dictionary like so: `--symptoms-bsv /in/my-symptoms.bsv`. This file should look like (this is a portion of the default Covid dictionary): ``` -## Columns = CUI|TUI|CODE|SAB|STR|PREF +## Columns = CUI|TUI||STR|PREF ## CUI = Concept Unique Identifier ## TUI = Type Unique Identifier -## CODE = Vocabulary Code -## SAB = Vocabulary Source Abbreviation (SNOMEDCT_US) ## STR = String text in clinical note (case insensitive) ## PREF = Preferred output concept label ## Congestion or runny nose -C0027424|T184|68235000|SNOMEDCT_US|nasal congestion|Congestion or runny nose -C0027424|T184|68235000|SNOMEDCT_US|stuffed-up nose|Congestion or runny nose -C0027424|T184|68235000|SNOMEDCT_US|stuffy nose|Congestion or runny nose -C0027424|T184|68235000|SNOMEDCT_US|congested nose|Congestion or runny nose -C1260880|T184|64531003|SNOMEDCT_US|rhinorrhea|Congestion or runny nose -C1260880|T184|64531003|SNOMEDCT_US|Nasal discharge|Congestion or runny nose -C1260880|T184|64531003|SNOMEDCT_US|discharge from nose|Congestion or runny nose -C1260880|T184|267101005|SNOMEDCT_US|nose dripping|Congestion or runny nose -C1260880|T184|267101005|SNOMEDCT_US|nose running|Congestion or runny nose -C1260880|T184|267101005|SNOMEDCT_US|running nose|Congestion or runny nose -C1260880|T184|HP:0031417|HPO|runny nose|Congestion or runny nose -C0027424|T184|R09.81|ICD10CM|R09.81|Congestion or runny nose +C0027424|T184|nasal congestion|Congestion or runny nose +C0027424|T184|stuffed-up nose|Congestion or runny nose +C0027424|T184|stuffy nose|Congestion or runny nose +C0027424|T184|congested nose|Congestion or runny nose +C1260880|T184|rhinorrhea|Congestion or runny nose +C1260880|T184|Nasal discharge|Congestion or runny nose +C1260880|T184|discharge from nose|Congestion or runny nose +C1260880|T184|nose dripping|Congestion or runny nose +C1260880|T184|nose running|Congestion or runny nose +C1260880|T184|running nose|Congestion or runny nose +C1260880|T184|runny nose|Congestion or runny nose +C0027424|T184|R09.81|Congestion or runny nose ## Diarrhea -C0011991|T184|62315008|SNOMEDCT_US|diarrhea|Diarrhea -C0011991|T184|R19.7|ICD10CM|R19.7|Diarrhea -C0011991|T184|HP:0002014|HPO|Watery stool|Diarrhea -C0011991|T184|HP:0002014|HPO|Watery stools|Diarrhea +C0011991|T184|diarrhea|Diarrhea +C0011991|T184|R19.7|Diarrhea +C0011991|T184|Watery stool|Diarrhea +C0011991|T184|Watery stools|Diarrhea ``` -Chart review mode will only label phrases whose CUI appears in this symptom file. +Upload mode will only label phrases whose CUI appears in this symptom file. And the label used will be the last part of each line (the `PREF` part). That is, with the above symptoms file, the word `headache` would not be labelled at all @@ -239,17 +239,17 @@ Simply pass `--no-nlp` or `--no-philter` and those steps will be skipped. ### Overwriting -By default, chart review mode will never overwrite any data in Label Studio. +By default, upload mode will never overwrite any data in Label Studio. It will push new notes and skip any that were already uploaded to Label Studio. But obviously, that becomes annoying if you are iterating on a dictionary or -otherwise re-running chart review mode. +otherwise re-running upload mode. So to overwrite existing notes, simply pass `--overwrite`. ### Label Config -Before using chart review mode, you should have already set up your Label Studio instance. +Before using upload mode, you should have already set up your Label Studio instance. Read [their docs](https://labelstud.io/guide/) to get started with that. Those docs can guide you through how to define your labels. @@ -264,7 +264,7 @@ But just briefly, a setup like this with hard-coded labels will work: ``` -Or you can use dynamic labels, and chart review mode will define them from your symptoms file. +Or you can use dynamic labels, and upload mode will define them from your symptoms file. Note that the `value` argument must match the `name` argument in your config, like so: ``` diff --git a/tests/test_cli.py b/tests/test_cli.py index de87730d..90c9b08f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -18,8 +18,10 @@ class TestCumulusCLI(AsyncTestCase): @ddt.data( ([], "usage: cumulus-etl [OPTION]..."), + (["chart-review"], "usage: cumulus-etl upload-notes [OPTION]..."), + (["convert"], "usage: cumulus-etl convert [OPTION]..."), (["etl"], "usage: cumulus-etl etl [OPTION]..."), - (["chart-review"], "usage: cumulus-etl chart-review [OPTION]..."), + (["upload-notes"], "usage: cumulus-etl upload-notes [OPTION]..."), ) @ddt.unpack async def test_usage(self, argv, expected_usage): @@ -33,9 +35,10 @@ async def test_usage(self, argv, expected_usage): @ddt.data( ([], "cumulus_etl.etl.run_etl"), - (["etl"], "cumulus_etl.etl.run_etl"), - (["chart-review"], "cumulus_etl.chart_review.run_chart_review"), + (["chart-review"], "cumulus_etl.upload_notes.run_upload_notes"), (["convert"], "cumulus_etl.etl.convert.run_convert"), + (["etl"], "cumulus_etl.etl.run_etl"), + (["upload-notes"], "cumulus_etl.upload_notes.run_upload_notes"), ) @ddt.unpack async def test_routing(self, argv, main_method): diff --git a/tests/chart_review/__init__.py b/tests/upload_notes/__init__.py similarity index 100% rename from tests/chart_review/__init__.py rename to tests/upload_notes/__init__.py diff --git a/tests/chart_review/test_chart_cli.py b/tests/upload_notes/test_upload_cli.py similarity index 91% rename from tests/chart_review/test_chart_cli.py rename to tests/upload_notes/test_upload_cli.py index 38b5bd43..365c2258 100644 --- a/tests/chart_review/test_chart_cli.py +++ b/tests/upload_notes/test_upload_cli.py @@ -1,4 +1,4 @@ -"""Tests for chart_review/cli.py""" +"""Tests for upload_notes/cli.py""" import base64 import itertools @@ -12,7 +12,7 @@ import respx from cumulus_etl import cli, common, errors -from cumulus_etl.chart_review.labelstudio import LabelStudioNote +from cumulus_etl.upload_notes.labelstudio import LabelStudioNote from tests.ctakesmock import CtakesMixin from tests.utils import AsyncTestCase @@ -28,8 +28,8 @@ @ddt.ddt -class TestChartReview(CtakesMixin, AsyncTestCase): - """Tests for high-level chart review support.""" +class TestUploadNotes(CtakesMixin, AsyncTestCase): + """Tests for high-level upload-notes support.""" def setUp(self): super().setUp() @@ -48,7 +48,7 @@ def setUp(self): self.token_path = os.path.join(tmpdir, "ls-token.txt") common.write_text(self.token_path, "abc123") - self.ls_client_mock = self.patch("cumulus_etl.chart_review.cli.LabelStudioClient") + self.ls_client_mock = self.patch("cumulus_etl.upload_notes.cli.LabelStudioClient") self.ls_client = self.ls_client_mock.return_value # Write some initial cached patient mappings, so we can reverse-engineer them @@ -70,7 +70,7 @@ def setUp(self): }, ) - async def run_chart_review( + async def run_upload_notes( self, input_path=None, phi_path=None, @@ -81,7 +81,7 @@ async def run_chart_review( overwrite=False, ) -> None: args = [ - "chart-review", + "upload-notes", input_path or self.input_path, "https://localhost/labelstudio", phi_path or self.phi_path, @@ -131,7 +131,7 @@ def mock_search_url(respx_mock: respx.MockRouter, patient: str, doc_ids: Iterabl "resourceType": "Bundle", "entry": [ { - "resource": TestChartReview.make_docref(doc_id), + "resource": TestUploadNotes.make_docref(doc_id), } for doc_id in doc_ids ], @@ -141,7 +141,7 @@ def mock_search_url(respx_mock: respx.MockRouter, patient: str, doc_ids: Iterabl @staticmethod def mock_read_url(respx_mock: respx.MockRouter, doc_id: str, code: int = 200, **kwargs) -> None: - docref = TestChartReview.make_docref(doc_id, **kwargs) + docref = TestUploadNotes.make_docref(doc_id, **kwargs) respx_mock.get(f"https://localhost/DocumentReference/{doc_id}").respond(status_code=code, json=docref) @staticmethod @@ -181,7 +181,7 @@ def wrap_note(title: str, text: str, first: bool = True) -> str: async def test_real_and_fake_docrefs_conflict(self): """Verify that you can't pass in both real and fake docrefs""" with self.assertRaises(SystemExit) as cm: - await self.run_chart_review(anon_docrefs="foo", docrefs="bar") + await self.run_upload_notes(anon_docrefs="foo", docrefs="bar") self.assertEqual(errors.ARGS_CONFLICT, cm.exception.code) @respx.mock(assert_all_mocked=False) @@ -200,7 +200,7 @@ async def test_gather_anon_docrefs_from_server(self, respx_mock): ("unknown-doc", "unknown-patient"), # gracefully ignored ], ) - await self.run_chart_review(input_path="https://localhost", anon_docrefs=file.name) + await self.run_upload_notes(input_path="https://localhost", anon_docrefs=file.name) self.assertEqual({"D1", "D2", "D3"}, self.get_exported_ids()) self.assertEqual({"D1", "D2", "D3"}, self.get_pushed_ids()) @@ -215,12 +215,12 @@ async def test_gather_real_docrefs_from_server(self, respx_mock): with tempfile.NamedTemporaryFile() as file: self.write_real_docrefs(file.name, ["D1", "D2", "D3", "unknown-doc"]) - await self.run_chart_review(input_path="https://localhost", docrefs=file.name) + await self.run_upload_notes(input_path="https://localhost", docrefs=file.name) self.assertEqual({"D1", "D2", "D3"}, self.get_exported_ids()) self.assertEqual({"D1", "D2", "D3"}, self.get_pushed_ids()) - @mock.patch("cumulus_etl.chart_review.downloader.loaders.FhirNdjsonLoader") + @mock.patch("cumulus_etl.upload_notes.downloader.loaders.FhirNdjsonLoader") async def test_gather_all_docrefs_from_server(self, mock_loader): # Mock out the bulk export loading, as that's well tested elsewhere async def load_all(*args): @@ -230,8 +230,8 @@ async def load_all(*args): load_all_mock = mock_loader.return_value.load_all load_all_mock.side_effect = load_all - # Do the actual chart review push - await self.run_chart_review(input_path="https://localhost") + # Do the actual upload-notes push + await self.run_upload_notes(input_path="https://localhost") # Make sure we drive the bulk export correctly self.assertEqual(1, mock_loader.call_count) @@ -251,7 +251,7 @@ async def test_gather_anon_docrefs_from_folder(self): ("unknown-doc", "unknown-patient"), # gracefully ignored ], ) - await self.run_chart_review(anon_docrefs=file.name) + await self.run_upload_notes(anon_docrefs=file.name) self.assertEqual({"43"}, self.get_exported_ids()) self.assertEqual({"43"}, self.get_pushed_ids()) @@ -259,18 +259,18 @@ async def test_gather_anon_docrefs_from_folder(self): async def test_gather_real_docrefs_from_folder(self): with tempfile.NamedTemporaryFile() as file: self.write_real_docrefs(file.name, ["44", "unknown-doc"]) - await self.run_chart_review(docrefs=file.name) + await self.run_upload_notes(docrefs=file.name) self.assertEqual({"44"}, self.get_exported_ids()) self.assertEqual({"44"}, self.get_pushed_ids()) async def test_gather_all_docrefs_from_folder(self): - await self.run_chart_review() + await self.run_upload_notes() self.assertEqual({"43", "44"}, self.get_exported_ids()) self.assertEqual({"43", "44"}, self.get_pushed_ids()) async def test_successful_push_to_label_studio(self): - await self.run_chart_review() + await self.run_upload_notes() # Confirm we passed LS args down to the Label Studio client self.assertEqual( @@ -320,11 +320,11 @@ async def test_successful_push_to_label_studio(self): @ddt.data(True, False) async def test_overwrite(self, overwrite): """Verify we pass down --overwrite correctly""" - await self.run_chart_review(overwrite=overwrite) + await self.run_upload_notes(overwrite=overwrite) self.assertEqual(overwrite, self.ls_client.push_tasks.call_args[1]["overwrite"]) async def test_disabled_nlp(self): - await self.run_chart_review(nlp=False) + await self.run_upload_notes(nlp=False) tasks = self.ls_client.push_tasks.call_args[0][0] self.assertGreater(len(tasks), 0) @@ -334,8 +334,8 @@ async def test_disabled_nlp(self): @ddt.data(True, False) async def test_philter(self, run_philter): notes = [LabelStudioNote("EncID", "EncAnon", title="My Title", text="John Smith called on 10/13/2010")] - with mock.patch("cumulus_etl.chart_review.cli.read_notes_from_ndjson", return_value=notes): - await self.run_chart_review(philter=run_philter) + with mock.patch("cumulus_etl.upload_notes.cli.read_notes_from_ndjson", return_value=notes): + await self.run_upload_notes(philter=run_philter) tasks = self.ls_client.push_tasks.call_args[0][0] self.assertEqual(1, len(tasks)) @@ -359,7 +359,7 @@ async def test_combined_encounter_offsets(self, respx_mock): with tempfile.NamedTemporaryFile() as file: self.write_real_docrefs(file.name, ["D1", "D2"]) - await self.run_chart_review(input_path="https://localhost", docrefs=file.name) + await self.run_upload_notes(input_path="https://localhost", docrefs=file.name) notes = self.ls_client.push_tasks.call_args[0][0] self.assertEqual(1, len(notes)) diff --git a/tests/chart_review/test_chart_labelstudio.py b/tests/upload_notes/test_upload_labelstudio.py similarity index 96% rename from tests/chart_review/test_chart_labelstudio.py rename to tests/upload_notes/test_upload_labelstudio.py index a7eee71b..c1cf1b8e 100644 --- a/tests/chart_review/test_chart_labelstudio.py +++ b/tests/upload_notes/test_upload_labelstudio.py @@ -1,24 +1,24 @@ -"""Tests for cumulus.chart_review.labelstudio.py""" +"""Tests for cumulus.upload_notes.labelstudio.py""" from unittest import mock import ddt from ctakesclient.typesystem import Polarity -from cumulus_etl.chart_review.labelstudio import LabelStudioClient, LabelStudioNote +from cumulus_etl.upload_notes.labelstudio import LabelStudioClient, LabelStudioNote from tests import ctakesmock from tests.utils import AsyncTestCase @ddt.ddt -class TestChartLabelStudio(AsyncTestCase): +class TestUploadLabelStudio(AsyncTestCase): """Test case for label studio support""" def setUp(self): super().setUp() - self.ls_mock = self.patch("cumulus_etl.chart_review.labelstudio.label_studio_sdk.Client") + self.ls_mock = self.patch("cumulus_etl.upload_notes.labelstudio.label_studio_sdk.Client") self.ls_client = self.ls_mock.return_value self.ls_project = self.ls_client.get_project.return_value self.ls_project.get_tasks.return_value = []