Skip to content

Commit

Permalink
Merge pull request #20 from acdh-oeaw/17-upgrade-extract_begin_end-fu…
Browse files Browse the repository at this point in the history
…nction

feature: hardend `extract_begin_end` #17
  • Loading branch information
csae8092 authored Jan 27, 2023
2 parents 2390480 + 9a3c888 commit 94db068
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 25 deletions.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ print(normalize_string(string))

### extract date attributes (begin, end)

expects typical TEI date attributes like `@when, @when-iso, @notBefore, @notAfter` and returns a tuple containg start- and enddate values. If only `@when or @when-iso` or only `@notBefore or @notAfter` are provided, the returned values are the same
expects typical TEI date attributes like `@when, @when-iso, @notBefore, @notAfter, @from, @to, ...` and returns a tuple containg start- and enddate values. If only `@when or @when-iso` or only `@notBefore or @notAfter` are provided, the returned values are the same, unless the default parameter `fill_missing` is set to false.

```python
from lxml.etree import Element
Expand All @@ -313,6 +313,14 @@ print(extract_begin_end(date_object))
# returns
# ('1900-12-12', '1900-12-12')

date_string = "1900-12-12"
date_object = Element("{http://www.tei-c.org/ns/1.0}tei")
date_object.attrib["when-iso"] = date_string
print(extract_begin_end(date_object, fill_missing=False))

# returns
# ('1900-12-12', None)

date_object = Element("{http://www.tei-c.org/ns/1.0}tei")
date_object.attrib["notAfter"] = "1900-12-12"
date_object.attrib["notBefore"] = "1800"
Expand Down
50 changes: 36 additions & 14 deletions acdh_cidoc_pyutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from rdflib import Graph, Literal, URIRef, XSD, RDF, RDFS, OWL
from slugify import slugify
from acdh_tei_pyutils.utils import make_entity_label
from acdh_cidoc_pyutils.namespaces import CIDOC, NSMAP
from acdh_cidoc_pyutils.namespaces import CIDOC, NSMAP, DATE_ATTRIBUTE_DICT


def normalize_string(string: str) -> str:
Expand Down Expand Up @@ -45,19 +45,41 @@ def coordinates_to_p168(
return g


def extract_begin_end(date_object: Union[Element, dict]) -> tuple[str, str]:
begin, end = "", ""
if date_object.get("when-iso", "") != "":
return (date_object.get("when-iso"), date_object.get("when-iso"))
elif date_object.get("when", "") != "":
return (date_object.get("when"), date_object.get("when"))
begin = date_object.get("notBefore", "")
end = date_object.get("notAfter", "")
if begin != "" and end == "":
end = begin
if end != "" and begin == "":
begin = end
return (begin, end)
def extract_begin_end(
date_object: Union[Element, dict],
fill_missing=True,
attribute_map=DATE_ATTRIBUTE_DICT,
) -> tuple[Union[str, bool], Union[str, bool]]:
final_start, final_end = None, None
start, end, when = None, None, None
for key, value in attribute_map.items():
date_value = date_object.get(key)
if date_value and value == "start":
start = date_value
if date_value and value == "end":
end = date_value
if date_value and value == "when":
when = date_value
if fill_missing:
if start or end or when:
if start and end:
final_start, final_end = start, end
elif start and not end and not when:
final_start, final_end = start, start
elif end and not start and not when:
final_start, final_end = end, end
elif when and not start and not end:
final_start, final_end = when, when
else:
if start and end:
final_start, final_end = start, end
elif start and not end and not when:
final_start, final_end = start, None
elif end and not start and not when:
final_start, final_end = None, end
elif when and not start and not end:
final_start, final_end = when, when
return final_start, final_end


def date_to_literal(date_str: str) -> Literal:
Expand Down
13 changes: 13 additions & 0 deletions acdh_cidoc_pyutils/namespaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,16 @@
"tei": "http://www.tei-c.org/ns/1.0",
"xml": "http://www.w3.org/XML/1998/namespace",
}

DATE_ATTRIBUTE_DICT = {
"notBefore": "start",
"notBefore-iso": "start",
"from": "start",
"from-iso": "start",
"notAfter": "end",
"notAfter-iso": "end",
"to": "end",
"to-iso": "end",
"when": "when",
"when-iso": "when"
}
44 changes: 34 additions & 10 deletions tests/test_cidoc_pyutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,37 +144,61 @@ def test_006_begin_end(self):
date_object = Element("hansi")
date_object.attrib["when-iso"] = date_string
begin, end = extract_begin_end(date_object)
self.assertTrue(begin, date_string)
self.assertTrue(end, date_string)
self.assertEqual(begin, date_string)
self.assertEqual(end, date_string)
begin, end = extract_begin_end(date_object, fill_missing=False)
self.assertEqual(begin, date_string)
self.assertEqual(end, date_string)

date_string = "1900-12-12"
date_object = Element("hansi")
date_object.attrib["from-iso"] = date_string
begin, end = extract_begin_end(date_object, fill_missing=False)
self.assertEqual(begin, date_string)
self.assertEqual(end, None)

date_string = "1900-12-12"
date_object = Element("hansi")
date_object.attrib["when"] = date_string
begin, end = extract_begin_end(date_object)
self.assertTrue(begin, date_string)
self.assertTrue(end, date_string)
self.assertEqual(begin, date_string)
self.assertEqual(end, date_string)

date_string = "1900-12-12"
date_object = Element("hansi")
date_object.attrib["notAfter"] = date_string
begin, end = extract_begin_end(date_object)
self.assertTrue(begin, date_string)
self.assertTrue(end, date_string)
self.assertEqual(begin, date_string)
self.assertEqual(end, date_string)

date_string = "1900-12-12"
date_object = Element("hansi")
date_object.attrib["notBefore"] = date_string
begin, end = extract_begin_end(date_object)
self.assertTrue(begin, date_string)
self.assertTrue(end, date_string)
self.assertEqual(begin, date_string)
self.assertEqual(end, date_string)

date_string = "1900-12-12"
date_object = Element("hansi")
date_object.attrib["notAfter"] = date_string
date_object.attrib["notBefore"] = "1800"
begin, end = extract_begin_end(date_object)
self.assertTrue(begin, "1800")
self.assertTrue(end, date_string)
self.assertEqual(begin, "1800")
self.assertEqual(end, date_string)

date_string = "1900-12-12"
date_object = Element("hansi")
date_object.attrib["notAfter"] = date_string
date_object.attrib["notBefore"] = "1800"
begin, end = extract_begin_end(date_object, fill_missing=False)
self.assertEqual(begin, "1800")
self.assertEqual(end, date_string)
date_string = "1900-12-12"
date_object = Element("hansi")
date_object.attrib["to"] = date_string
begin, end = extract_begin_end(date_object, fill_missing=False)
self.assertEqual(begin, None)
self.assertEqual(end, date_string)

def test_007_make_appelations(self):
g = Graph()
Expand Down

0 comments on commit 94db068

Please sign in to comment.