Skip to content

Commit

Permalink
Avoid DB import bugs by trimming strings at import transform
Browse files Browse the repository at this point in the history
  • Loading branch information
jessemortenson committed Nov 22, 2024
1 parent 41e214d commit 0b6f1ba
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 11 deletions.
24 changes: 14 additions & 10 deletions openstates/importers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,16 +532,20 @@ def apply_transformers(
if transformers is None:
transformers = self.cached_transformers

for key, key_transformers in transformers.items():
if key not in data:
continue
if isinstance(key_transformers, list):
for transformer in key_transformers:
data[key] = transformer(data[key])
elif isinstance(key_transformers, dict):
self.apply_transformers(data[key], key_transformers)
else:
data[key] = key_transformers(data[key])
if isinstance(data, list):
for data_item in data:
self.apply_transformers(data_item, transformers)
else:
for key, key_transformers in transformers.items():
if key not in data:
continue
if isinstance(key_transformers, list):
for transformer in key_transformers:
data[key] = transformer(data[key])
elif isinstance(key_transformers, dict):
self.apply_transformers(data[key], key_transformers)
else:
data[key] = key_transformers(data[key])

return data

Expand Down
11 changes: 10 additions & 1 deletion openstates/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,16 @@
CACHE_DIR = os.path.join(os.getcwd(), "_cache")
SCRAPED_DATA_DIR = os.path.join(os.getcwd(), "_data")

IMPORT_TRANSFORMERS = {"bill": {"identifier": transformers.fix_bill_id}}
IMPORT_TRANSFORMERS = {
"bill": {
"identifier": transformers.fix_bill_id,
"documents": {"note": transformers.truncate_300}, # TODO remove when db migration done
"versions": {"note": transformers.truncate_300}, # TODO remove when db migration done
},
"event": {
"media": {"note": transformers.truncate_300}, # TODO remove when db migration done
}
}

# Django settings
LOGGING = {
Expand Down
4 changes: 4 additions & 0 deletions openstates/utils/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ def fix_bill_id(bill_id: str) -> str:

def collapse_whitespace(value: str) -> str:
return _whitespace_re.sub(" ", value)


def truncate_300(value: str) -> str:
return value[:300]

0 comments on commit 0b6f1ba

Please sign in to comment.