Skip to content

Commit

Permalink
Merge pull request #293 from smart-on-fhir/mikix/connect-warnings
Browse files Browse the repository at this point in the history
feat: downgrade some fatal connection errors to warnings
  • Loading branch information
mikix authored Jan 10, 2024
2 parents 7405ff8 + 7a32259 commit d8112ec
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 18 deletions.
20 changes: 19 additions & 1 deletion cumulus_etl/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
TASK_SET_EMPTY = 21
ARGS_CONFLICT = 22
ARGS_INVALID = 23
FHIR_URL_MISSING = 24
# FHIR_URL_MISSING = 24 # Obsolete, it's no longer fatal
BASIC_CREDENTIALS_MISSING = 25
FOLDER_NOT_EMPTY = 26
BULK_EXPORT_FOLDER_NOT_LOCAL = 27
Expand All @@ -32,6 +32,24 @@
SERVICE_MISSING = 33 # generic init-check service is missing


class FhirConnectionError(Exception):
"""We needed to connect to a FHIR server but failed"""


class FhirUrlMissing(FhirConnectionError):
"""We needed to connect to a FHIR server but no URL was provided"""

def __init__(self):
super().__init__("Could not download some files without a FHIR server URL (use --fhir-url)")


class FhirAuthMissing(FhirConnectionError):
"""We needed to connect to a FHIR server but no authentication config was provided"""

def __init__(self):
super().__init__("Could not download some files without authentication parameters (see --help)")


class FatalError(Exception):
"""An unrecoverable error"""

Expand Down
14 changes: 13 additions & 1 deletion cumulus_etl/etl/tasks/nlp_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
import copy
import logging
import os
import sys
from typing import Callable

import rich.progress

from cumulus_etl import common, fhir, nlp, store
from cumulus_etl import common, errors, fhir, nlp, store
from cumulus_etl.etl.tasks.base import EtlTask, OutputTable


Expand Down Expand Up @@ -63,6 +64,8 @@ async def read_notes(
:returns: a tuple of original-docref, scrubbed-docref, and clinical note
"""
warned_connection_error = False

for docref in self.read_ndjson(progress=progress):
orig_docref = copy.deepcopy(docref)
can_process = (
Expand All @@ -75,6 +78,15 @@ async def read_notes(

try:
clinical_note = await fhir.get_docref_note(self.task_config.client, docref)
except errors.FhirConnectionError as exc:
if not warned_connection_error:
# Only warn user about a misconfiguration once per task.
# It's not fatal because it might be intentional (partially inlined DocRefs
# and the other DocRefs are known failures - BCH hits this with Cerner data).
print(exc, file=sys.stderr)
warned_connection_error = True
self.add_error(orig_docref)
continue
except Exception as exc: # pylint: disable=broad-except
logging.warning("Error getting text for docref %s: %s", docref["id"], exc)
self.add_error(orig_docref)
Expand Down
11 changes: 3 additions & 8 deletions cumulus_etl/fhir/fhir_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ def urljoin(base: str, path: str) -> str:
return path

if not base:
print("You must provide a base FHIR server URL with --fhir-url", file=sys.stderr)
raise SystemExit(errors.FHIR_URL_MISSING)
raise errors.FhirUrlMissing()
return urllib.parse.urljoin(base, path)


Expand All @@ -34,12 +33,8 @@ async def authorize(self, session: httpx.AsyncClient, reauthorize=False) -> None
del session

if reauthorize:
# Abort because we clearly need authentication tokens, but have not been given any parameters for them.
print(
"You must provide some authentication parameters (like --smart-client-id) to connect to a server.",
file=sys.stderr,
)
raise SystemExit(errors.SMART_CREDENTIALS_MISSING)
# We clearly need auth tokens, but have not been given any parameters for them.
raise errors.FhirAuthMissing()

def sign_headers(self, headers: dict) -> dict:
"""Add signature token to request headers"""
Expand Down
13 changes: 8 additions & 5 deletions cumulus_etl/loaders/fhir/bulk_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,14 @@ async def export(self) -> None:
# But some servers do support it, and it is a possible future addition to the spec.
params["_until"] = self._until

response = await self._request_with_delay(
urllib.parse.urljoin(self._url, f"$export?{urllib.parse.urlencode(params)}"),
headers={"Prefer": "respond-async"},
target_status_code=202,
)
try:
response = await self._request_with_delay(
urllib.parse.urljoin(self._url, f"$export?{urllib.parse.urlencode(params)}"),
headers={"Prefer": "respond-async"},
target_status_code=202,
)
except errors.FhirConnectionError as exc:
errors.fatal(str(exc), errors.FHIR_AUTH_FAILED)

# Grab the poll location URL for status updates
poll_location = response.headers["Content-Location"]
Expand Down
47 changes: 47 additions & 0 deletions tests/etl/test_etl_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
import json
import os
import shutil
import tempfile
from unittest import mock

import ddt
import respx
from ctakesclient.typesystem import Polarity

from cumulus_etl import common, errors, loaders, store
Expand Down Expand Up @@ -159,6 +161,17 @@ async def test_errors_to_passed_to_tasks(self):
await self.run_etl(errors_to=f"{self.output_path}/errors")
self.assertEqual(mock_etl_job.call_args[0][0].dir_errors, f"{self.output_path}/errors")

@respx.mock
async def test_bulk_no_auth(self):
"""Verify that if no auth is provided, we'll error out well."""
respx.get("https://localhost:12345/metadata").respond(401)
respx.get("https://localhost:12345/$export?_type=Patient").respond(401)

# Now run the ETL on that new input dir without any server auth config provided
with self.assertRaises(SystemExit) as cm:
await self.run_etl(input_path="https://localhost:12345/", tasks=["patient"])
self.assertEqual(errors.FHIR_AUTH_FAILED, cm.exception.code)


class TestEtlJobConfig(BaseEtlSimple):
"""Test case for the job config logging data"""
Expand Down Expand Up @@ -369,6 +382,40 @@ async def test_does_not_hit_server_if_cache_exists(self):
{("68235000", "C0027424")}, {(x["code"], x["cui"]) for x in symptom["match"]["conceptAttributes"]}
)

@respx.mock
async def test_gracefully_allows_no_server_config(self):
"""
Verify that if no server is provided, we'll just continue as best we can.
This is useful in cases where some notes have been inlined, but not everything could be.
"""
# Make new input dir with some docrefs that only have a URL, no data.
with tempfile.TemporaryDirectory() as tmpdir:
with common.NdjsonWriter(f"{tmpdir}/DocumentReference.ndjson") as writer:
src = store.Root(self.input_path)
docref_iter = common.read_resource_ndjson(src, "DocumentReference")

# Replace first row's data with a partial URL
first = next(docref_iter)
del first["content"][0]["attachment"]["data"]
first["content"][0]["attachment"]["url"] = "Binary/blarg"
writer.write(first)

# And second row with an absolute (forbidden) URL
second = next(docref_iter)
del second["content"][0]["attachment"]["data"]
second["content"][0]["attachment"]["url"] = "https://localhost:12345/Binary/blarg"
writer.write(second)

respx.get("https://localhost:12345/Binary/blarg").respond(401)

# Now run the ETL on that new input dir without any server auth config provided
with self.assertRaises(SystemExit) as cm:
await self.run_etl(tasks=["covid_symptom__nlp_results"], input_path=tmpdir)

# Should exit because the task will mark itself as failed
self.assertEqual(errors.TASK_FAILED, cm.exception.code)

async def test_cnlp_rejects(self):
"""Verify that if the cnlp server negates a match, it does not show up"""
# First match is fever, second is nausea
Expand Down
8 changes: 5 additions & 3 deletions tests/fhir/test_fhir_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,12 @@ async def use_client(request=False, code=None, url=self.server_url, **kwargs):
await use_client()

# No SMART args at all will raise though if we do make a call
await use_client(code=errors.SMART_CREDENTIALS_MISSING, request=True)
with self.assertRaises(errors.FhirAuthMissing):
await use_client(request=True)

# No client ID
await use_client(code=errors.FHIR_URL_MISSING, request=True, url=None)
# No base URL
with self.assertRaises(errors.FhirUrlMissing):
await use_client(request=True, url=None)

# No JWKS
await use_client(code=errors.SMART_CREDENTIALS_MISSING, smart_client_id="foo")
Expand Down

0 comments on commit d8112ec

Please sign in to comment.