From 47c87cf53ae5f8c6bfbf7949b6daf80b4ce171af Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Tue, 11 Jun 2024 14:22:32 +0200 Subject: [PATCH] Strip XML element text --- sdmx/reader/xml/v21.py | 13 ++++++++----- sdmx/reader/xml/v30.py | 8 ++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 3e0b2b90..1f7a9ad3 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -69,7 +69,7 @@ def info_from_element(cls, elem): for k in ("class", "package"): result[k] = elem.attrib.get(k, None) elif elem.tag == "URN": - result = sdmx.urn.match(elem.text) + result = sdmx.urn.match(elem.text.strip()) # If the URN doesn't specify an item ID, it is probably a reference to a # MaintainableArtefact, so target_id and id are the same result.update(target_id=result["item_id"] or result["id"]) @@ -355,7 +355,7 @@ def _st(reader, elem): @end("mes:Extracted mes:Prepared mes:ReportingBegin mes:ReportingEnd") def _datetime(reader, elem): - text, n = re.subn(r"(.*\.)(\d{6})\d+(\+.*)", r"\1\2\3", elem.text) + text, n = re.subn(r"(.*\.)(\d{6})\d+(\+.*)", r"\1\2\3", elem.text.strip()) if n > 0: log.debug(f"Truncate sub-microsecond time in <{QName(elem).localname}>") @@ -371,7 +371,10 @@ def _datetime(reader, elem): def _localization(reader, elem): reader.push( elem, - (elem.attrib.get(reader.qname("xml:lang"), model.DEFAULT_LOCALE), elem.text), + ( + elem.attrib.get(reader.qname("xml:lang"), model.DEFAULT_LOCALE), + elem.text.strip(), + ), ) @@ -753,7 +756,7 @@ def _key0(reader, elem): "ToVtlSubSpace": model.ToVTLSpaceKey, }[parent] - return cls(key=elem.text) + return cls(key=elem.text.strip()) @end("str:DataKeySet") @@ -769,7 +772,7 @@ def _p(reader, elem): reader.push( elem, model.Period( - is_inclusive=elem.attrib["isInclusive"], period=isoparse(elem.text) + is_inclusive=elem.attrib["isInclusive"], period=isoparse(elem.text.strip()) ), ) diff --git a/sdmx/reader/xml/v30.py b/sdmx/reader/xml/v30.py index 29a043fd..b3859d20 100644 --- a/sdmx/reader/xml/v30.py +++ b/sdmx/reader/xml/v30.py @@ -17,7 +17,7 @@ class Reference(BaseReference): @classmethod def info_from_element(cls, elem): try: - result = sdmx.urn.match(elem.text) + result = sdmx.urn.match(elem.text.strip()) # If the URN doesn't specify an item ID, it is probably a reference to a # MaintainableArtefact, so target_id and id are the same result.update(target_id=result["item_id"] or result["id"]) @@ -84,13 +84,13 @@ class Reader(XMLEventReader): @end("str:Codelist") def _cl(reader, elem): try: - sdmx.urn.match(elem.text) + sdmx.urn.match(elem.text.strip()) except ValueError: result = v21._itemscheme(reader, elem) result.extends = reader.pop_all(model.CodelistExtension) return result else: - reader.push(elem, elem.text) + reader.push(elem, elem.text.strip()) @end("str:CodelistExtension") @@ -111,7 +111,7 @@ def _code_selection(reader, elem): @end("str:MemberValue") def _mv(reader, elem): - return reader.model.MemberValue(value=elem.text) + return reader.model.MemberValue(value=elem.text.strip()) @end("str:GeoGridCodelist")