-
Notifications
You must be signed in to change notification settings - Fork 132
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
✨ Add NormalizeFieldKeys middleware (#473)
- Loading branch information
1 parent
53843c0
commit 09f8bc1
Showing
3 changed files
with
121 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import logging | ||
from typing import Dict | ||
from typing import List | ||
from typing import Set | ||
|
||
from bibtexparser.library import Library | ||
from bibtexparser.model import Entry | ||
from bibtexparser.model import Field | ||
|
||
from .middleware import BlockMiddleware | ||
|
||
|
||
class NormalizeFieldKeys(BlockMiddleware): | ||
"""Normalize field keys to lowercase. | ||
In case of conflicts (e.g. both 'author' and 'Author' exist in the same entry), | ||
a warning is emitted, and the last value wins. | ||
Some other middlewares, such as `SeparateCoAuthors`, assume lowercase key names. | ||
""" | ||
|
||
def __init__(self, allow_inplace_modification: bool = True): | ||
super().__init__( | ||
allow_inplace_modification=allow_inplace_modification, | ||
allow_parallel_execution=True, | ||
) | ||
|
||
# docstr-coverage: inherited | ||
def transform_entry(self, entry: Entry, library: "Library") -> Entry: | ||
seen_normalized_keys: Set[str] = set() | ||
new_fields_dict: Dict[str, Field] = {} | ||
for field in entry.fields: | ||
normalized_key: str = field.key.lower() | ||
# if the normalized key is already present, apply "last one wins" | ||
# otherwise preserve insertion order | ||
# if a key is overwritten, emit a detailed warning | ||
# if performance is a concern, we could emit a warning with only {entry.key} | ||
# to remove "seen_normalized_keys" and this if statement | ||
if normalized_key in seen_normalized_keys: | ||
logging.warning( | ||
f"NormalizeFieldKeys: in entry '{entry.key}': " | ||
+ f"duplicate normalized key '{normalized_key}' " | ||
+ f"(original '{field.key}'); overriding previous value" | ||
) | ||
seen_normalized_keys.add(normalized_key) | ||
field.key = normalized_key | ||
new_fields_dict[normalized_key] = field | ||
|
||
new_fields: List[Field] = list(new_fields_dict.values()) | ||
entry.fields = new_fields | ||
|
||
return entry |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import re | ||
|
||
from bibtexparser import Library | ||
from bibtexparser.middlewares.fieldkeys import NormalizeFieldKeys | ||
from bibtexparser.model import Entry | ||
from bibtexparser.model import Field | ||
|
||
entries = { | ||
"article": { | ||
"author": '"Smith, J."', | ||
"title": '"A Test Article"', | ||
"journal": '"J. of Testing"', | ||
"month": '"jan"', | ||
"year": '"2022"', | ||
}, | ||
"book": { | ||
"author": '"Doe, J."', | ||
"title": '"A Test Book"', | ||
"publisher": '"Test Pub."', | ||
"year": '"2021"', | ||
"month": "apr", | ||
}, | ||
"inproceedings": { | ||
"author": '"Jones, R."', | ||
"title": '"A Test Conf. Paper"', | ||
"booktitle": '"Proc. of the Intl. Test Conf."', | ||
"year": '"2023"', | ||
"month": "8", | ||
}, | ||
} | ||
|
||
ref = Library() | ||
for i, (entry_type, fields) in enumerate(entries.items()): | ||
f = [Field(key=k, value=v) for k, v in fields.items()] | ||
ref.add(Entry(entry_type=entry_type, key=f"entry{i}", fields=f)) | ||
|
||
|
||
def test_normalize_fieldkeys(): | ||
""" | ||
Check library with lowercase field keys. | ||
""" | ||
|
||
lib = Library() | ||
for i, (entry_type, fields) in enumerate(entries.items()): | ||
f = [Field(key=k, value=v) for k, v in fields.items()] | ||
lib.add(Entry(entry_type=entry_type, key=f"entry{i}", fields=f)) | ||
|
||
lib = NormalizeFieldKeys().transform(lib) | ||
|
||
for key in lib.entries_dict: | ||
assert lib.entries_dict[key] == ref.entries_dict[key] | ||
|
||
|
||
def test_normalize_fieldkeys_force_last(caplog): | ||
""" | ||
Check library with uppercase field keys and duplicate normalized keys. | ||
""" | ||
lib = Library() | ||
for i, (entry_type, fields) in enumerate(entries.items()): | ||
f = [Field(key=k.lower(), value="dummyvalue") for k in fields] | ||
f += [Field(key=k.upper(), value=v) for k, v in fields.items()] | ||
lib.add(Entry(entry_type=entry_type, key=f"entry{i}", fields=f)) | ||
|
||
lib = NormalizeFieldKeys().transform(lib) | ||
assert re.match(r"(WARNING\s*)(\w*\:\w*\.py\:[0-9]*\s*)(NormalizeFieldKeys)(.*)", caplog.text) | ||
|
||
for key in lib.entries_dict: | ||
assert lib.entries_dict[key] == ref.entries_dict[key] |