diff --git a/CHANGELOG.md b/CHANGELOG.md index ca26bd0f..0f5288de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 6.20.11 - Nov 22, 2024 + +* Use transformers to trim incoming strings at import that are too long for DB columns: + * Bill: document note, version note + * Event: media note + ## 6.20.10 - Nov 7, 2024 * Add additional log info re: archiving scrape files to cloud storage diff --git a/openstates/importers/base.py b/openstates/importers/base.py index 7ab80fb7..1198b0c5 100644 --- a/openstates/importers/base.py +++ b/openstates/importers/base.py @@ -532,16 +532,20 @@ def apply_transformers( if transformers is None: transformers = self.cached_transformers - for key, key_transformers in transformers.items(): - if key not in data: - continue - if isinstance(key_transformers, list): - for transformer in key_transformers: - data[key] = transformer(data[key]) - elif isinstance(key_transformers, dict): - self.apply_transformers(data[key], key_transformers) - else: - data[key] = key_transformers(data[key]) + if isinstance(data, list): + for data_item in data: + self.apply_transformers(data_item, transformers) + else: + for key, key_transformers in transformers.items(): + if key not in data: + continue + if isinstance(key_transformers, list): + for transformer in key_transformers: + data[key] = transformer(data[key]) + elif isinstance(key_transformers, dict): + self.apply_transformers(data[key], key_transformers) + else: + data[key] = key_transformers(data[key]) return data diff --git a/openstates/settings.py b/openstates/settings.py index e1763ab0..b1551bbd 100644 --- a/openstates/settings.py +++ b/openstates/settings.py @@ -24,7 +24,16 @@ CACHE_DIR = os.path.join(os.getcwd(), "_cache") SCRAPED_DATA_DIR = os.path.join(os.getcwd(), "_data") -IMPORT_TRANSFORMERS = {"bill": {"identifier": transformers.fix_bill_id}} +IMPORT_TRANSFORMERS = { + "bill": { + "identifier": transformers.fix_bill_id, + "documents": {"note": transformers.truncate_300}, # TODO remove when db migration done + "versions": {"note": transformers.truncate_300}, # TODO remove when db migration done + }, + "event": { + "media": {"note": transformers.truncate_300}, # TODO remove when db migration done + } +} # Django settings LOGGING = { diff --git a/openstates/utils/transformers.py b/openstates/utils/transformers.py index cab4ef1b..d82cd3f8 100644 --- a/openstates/utils/transformers.py +++ b/openstates/utils/transformers.py @@ -19,3 +19,7 @@ def fix_bill_id(bill_id: str) -> str: def collapse_whitespace(value: str) -> str: return _whitespace_re.sub(" ", value) + + +def truncate_300(value: str) -> str: + return value[:300] diff --git a/pyproject.toml b/pyproject.toml index 821f34fc..7f5571f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openstates" -version = "6.20.10" +version = "6.20.11" description = "core infrastructure for the openstates project" authors = ["James Turk "] license = "MIT"