From 08fbaf7ca504b75f5a37c3feb67cc816f3915c9b Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Mon, 23 Sep 2024 08:30:07 +0000 Subject: [PATCH] Expanded valiation options for pre 2015 --- CHANGELOG.md | 5 +++ pyproject.toml | 2 +- src/mysoc_validator/__init__.py | 2 +- src/mysoc_validator/models/transcripts.py | 42 +++++++++++++++++++---- tests/test_transcript.py | 9 +++++ 5 files changed, 52 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba6b3a7..770e1aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [comment]: # (Template for updates) +## [0.3.2] - 2024-09-23 + +### Fixed +- Expanded transcript validation options for pre-2015 transcripts. + ## [0.3.1] - 2024-09-11 ### Fixed diff --git a/pyproject.toml b/pyproject.toml index 686834c..4d4b951 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mysoc-validator" -version = "0.3.1" +version = "0.3.2" description = "Pydantic validators for mySociety democracy types" authors = ["mySociety "] readme = "README.md" diff --git a/src/mysoc_validator/__init__.py b/src/mysoc_validator/__init__.py index 145a052..14b81a1 100644 --- a/src/mysoc_validator/__init__.py +++ b/src/mysoc_validator/__init__.py @@ -6,6 +6,6 @@ from .models.popolo import Popolo from .models.transcripts import Transcript -__version__ = "0.3.1" +__version__ = "0.3.2" __all__ = ["Popolo", "Transcript", "Register", "__version__"] diff --git a/src/mysoc_validator/models/transcripts.py b/src/mysoc_validator/models/transcripts.py index df2961c..1d78c56 100644 --- a/src/mysoc_validator/models/transcripts.py +++ b/src/mysoc_validator/models/transcripts.py @@ -41,7 +41,8 @@ agreement_gid_pattern = ( r"uk\.org\.publicwhip\/[a-z]+\/\d{4}-\d{2}-\d{2}[a-z]?\.\d+\.\d+\.a\.\d+" ) -person_id_pattern = r"uk\.org\.publicwhip/person/\d+$" +person_id_pattern = r"(uk\.org\.publicwhip/person/\d+$|unknown$)" +member_id_pattern = r"(uk\.org\.publicwhip/member/\d+$|unknown$)" GIDPattern = Annotated[str, Field(pattern=gid_pattern)] @@ -113,11 +114,17 @@ class Speech(StrictBaseXMLModel, tags=["speech"]): type: str = "" nospeaker: Optional[str] = None speakername: Optional[str] = None + speakeroffice: Optional[str] = None + error: Optional[str] = None speech_type: Optional[str] = Field( validation_alias="speech", serialization_alias="speech", default=None ) - person_id: Optional[str] = Field( - pattern=r"uk\.org\.publicwhip/person/\d+$", default=None + person_id: Optional[str] = Field(pattern=person_id_pattern, default=None) + member_id: Optional[Annotated[str, Field(pattern=member_id_pattern)]] = Field( + validation_alias=AliasChoices("speakerid"), + serialization_alias="speakerid", + pattern=member_id_pattern, + default=None, ) colnum: Optional[str] = None time: Optional[str] = None @@ -137,6 +144,8 @@ class DivisionCount(StrictBaseXMLModel, tags=["divisioncount"]): noes: Optional[int] = None neutral: Optional[int] = None absent: Optional[int] = None + tellerayes: Optional[int] = None + tellernoes: Optional[int] = None class MSPName(StrictBaseXMLModel, tags=["mspname"]): @@ -144,6 +153,7 @@ class MSPName(StrictBaseXMLModel, tags=["mspname"]): validation_alias=AliasChoices("person_id", "id"), serialization_alias="id", pattern=person_id_pattern, + default=None, ) # scotland uses id rather than person_id vote: str proxy: Optional[str] = None @@ -153,13 +163,25 @@ class MSPName(StrictBaseXMLModel, tags=["mspname"]): class RepName( StrictBaseXMLModel, tags=["repname", "mpname", "msname", "mlaname", "lord"] ): - person_id: str = Field(pattern=person_id_pattern) + person_id: Optional[str] = Field(pattern=person_id_pattern, default=None) + member_id: Optional[str] = Field( + validation_alias=AliasChoices("id"), + serialization_alias="id", + pattern=member_id_pattern, + default=None, + ) vote: str teller: Optional[str] = None proxy: Optional[str] = None name: TextStr +def seperate_out_msp(value: Any) -> str: + if value["@tag"] == "mspname": + return "msp" + return "rep" + + class RepList( StrictBaseXMLModel, tags=["replist", "mplist", "msplist", "mslist", "mlalist", "lordlist"], @@ -180,7 +202,12 @@ class RepList( "abstentions", "didnotvote", ] - items: Items[Union[MSPName, RepName]] + items: Items[ + Annotated[ + Union[Annotated[MSPName, Tag("msp")], Annotated[RepName, Tag("rep")]], + Discriminator(seperate_out_msp), + ] + ] class Motion(StrictBaseXMLModel, tags=["motion"]): @@ -209,6 +236,7 @@ class Division(StrictBaseXMLModel, tags=["division"]): divnumber: int colnum: Optional[int] = None time: Optional[str] = None + url: Optional[str] = None count: AsAttrSingle[Optional[DivisionCount]] rel_motions: AsAttr[list[Motion]] = [] representatives: Items[RepList] @@ -230,7 +258,9 @@ class Transcript(StrictBaseXMLModel, tags=["publicwhip"]): TranscriptType: ClassVar[Type[TranscriptType]] = TranscriptType scraper_version: Optional[str] = Field( default=None, - validation_alias=AliasChoices("scraper_version", "scraperversion"), + validation_alias=AliasChoices( + "scraper_version", "scraperversion", "scrapeversion" + ), serialization_alias="scraperversion", ) latest: Optional[str] = Field(default=None) diff --git a/tests/test_transcript.py b/tests/test_transcript.py index 6b4a1e6..901d0e9 100644 --- a/tests/test_transcript.py +++ b/tests/test_transcript.py @@ -1,3 +1,4 @@ +from datetime import date from pathlib import Path from mysoc_validator.models.transcripts import Transcript @@ -7,6 +8,14 @@ def test_transcript_load(): Transcript.from_xml_path(Path("data", "debates2023-03-28d.xml")) +def test_transcript_date(): + Transcript.from_parlparse( + date(2015, 1, 20), + chamber=Transcript.Chamber.COMMONS, + transcript_type=Transcript.TranscriptType.DEBATES, + ) + + def test_transcript_round_trip(): t = Transcript.from_xml_path(Path("data", "debates2023-03-28d.xml"))