diff --git a/cumulus_etl/deid/philter.py b/cumulus_etl/deid/philter.py index 1dd7acc..92304a0 100644 --- a/cumulus_etl/deid/philter.py +++ b/cumulus_etl/deid/philter.py @@ -16,7 +16,7 @@ class Philter: def __init__(self): # Ensure all the nltk data that our filter_config (below) needs is available. # In docker deployments, these should already be shipped with our docker image. - nltk.download("averaged_perceptron_tagger", quiet=True) + nltk.download("averaged_perceptron_tagger_eng", quiet=True) # philter-lite does not seem to have any easy way to reference this default config...? filter_config = os.path.join(os.path.dirname(__file__), "philter-config.toml") diff --git a/pyproject.toml b/pyproject.toml index 5ed98e8..4a303aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "inscriptis < 3", "jwcrypto < 2", "label-studio-sdk < 2", + "nltk >= 3.9, < 4", "openai < 2", "oracledb < 3", "philter-lite < 1",