From d670c188ce79cb3d7c6e43d4f461b4ca2daa4f31 Mon Sep 17 00:00:00 2001 From: Matthew Lutze Date: Fri, 24 Nov 2023 05:26:08 +0100 Subject: [PATCH] :bug: Fix bug in `von`-part name parsing (#423) --- bibtexparser/middlewares/names.py | 14 ++++++++++---- tests/middleware_tests/test_names.py | 21 +++++++++++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/bibtexparser/middlewares/names.py b/bibtexparser/middlewares/names.py index 8898eae..9b6d308 100644 --- a/bibtexparser/middlewares/names.py +++ b/bibtexparser/middlewares/names.py @@ -424,11 +424,17 @@ def parse_single_name_into_parts(name, strict=True): else: lcases = cases[0] - # + def rindex(l, x, default): + """Returns the index of the rightmost occurence of x in l.""" + for i in range(len(l) - 1, -1, -1): + if l[i] == x: + return i + return default + + # Check if at least one of the words is lowercase if 0 in lcases: - split = len(lcases) - lcases[::-1].index(0) - if split == len(lcases): - split = 0 # Last cannot be empty. + # Excluding the last word, find the index of the last lower word + split = rindex(lcases[:-1], 0, -1) + 1 parts.von = sections[0][:split] parts.last = sections[0][split:] diff --git a/tests/middleware_tests/test_names.py b/tests/middleware_tests/test_names.py index 4a420a3..fb5e6bc 100644 --- a/tests/middleware_tests/test_names.py +++ b/tests/middleware_tests/test_names.py @@ -825,6 +825,27 @@ def test_name_splitting_commas_at_higher_brace_level(strict: bool): r"Brand\~{a}o, F", {"first": ["F"], "von": [], "last": ["Brand\\", "{a}o"], "jr": []}, ), + ############################################################################### + # + # Group 2 examples from Tame the BeaST + # + ############################################################################### + ( + r"de la fontaine, Jean", + {"first": ["Jean"], "von": ["de", "la"], "last": ["fontaine"], "jr": []}, + ), + ( + r"De La Fontaine, Jean", + {"first": ["Jean"], "von": [], "last": ["De", "La", "Fontaine"], "jr": []}, + ), + ( + r"De la Fontaine, Jean", + {"first": ["Jean"], "von": ["De", "la"], "last": ["Fontaine"], "jr": []}, + ), + ( + r"de La Fontaine, Jean", + {"first": ["Jean"], "von": ["de"], "last": ["La", "Fontaine"], "jr": []}, + ), )