Skip to content

Commit

Permalink
Expanded valiation options for pre 2015
Browse files Browse the repository at this point in the history
  • Loading branch information
ajparsons committed Sep 23, 2024
1 parent d611aab commit 08fbaf7
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 8 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

[comment]: # (Template for updates)

## [0.3.2] - 2024-09-23

### Fixed
- Expanded transcript validation options for pre-2015 transcripts.

## [0.3.1] - 2024-09-11

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "mysoc-validator"
version = "0.3.1"
version = "0.3.2"
description = "Pydantic validators for mySociety democracy types"
authors = ["mySociety <[email protected]>"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion src/mysoc_validator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
from .models.popolo import Popolo
from .models.transcripts import Transcript

__version__ = "0.3.1"
__version__ = "0.3.2"

__all__ = ["Popolo", "Transcript", "Register", "__version__"]
42 changes: 36 additions & 6 deletions src/mysoc_validator/models/transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@
agreement_gid_pattern = (
r"uk\.org\.publicwhip\/[a-z]+\/\d{4}-\d{2}-\d{2}[a-z]?\.\d+\.\d+\.a\.\d+"
)
person_id_pattern = r"uk\.org\.publicwhip/person/\d+$"
person_id_pattern = r"(uk\.org\.publicwhip/person/\d+$|unknown$)"
member_id_pattern = r"(uk\.org\.publicwhip/member/\d+$|unknown$)"
GIDPattern = Annotated[str, Field(pattern=gid_pattern)]


Expand Down Expand Up @@ -113,11 +114,17 @@ class Speech(StrictBaseXMLModel, tags=["speech"]):
type: str = ""
nospeaker: Optional[str] = None
speakername: Optional[str] = None
speakeroffice: Optional[str] = None
error: Optional[str] = None
speech_type: Optional[str] = Field(
validation_alias="speech", serialization_alias="speech", default=None
)
person_id: Optional[str] = Field(
pattern=r"uk\.org\.publicwhip/person/\d+$", default=None
person_id: Optional[str] = Field(pattern=person_id_pattern, default=None)
member_id: Optional[Annotated[str, Field(pattern=member_id_pattern)]] = Field(
validation_alias=AliasChoices("speakerid"),
serialization_alias="speakerid",
pattern=member_id_pattern,
default=None,
)
colnum: Optional[str] = None
time: Optional[str] = None
Expand All @@ -137,13 +144,16 @@ class DivisionCount(StrictBaseXMLModel, tags=["divisioncount"]):
noes: Optional[int] = None
neutral: Optional[int] = None
absent: Optional[int] = None
tellerayes: Optional[int] = None
tellernoes: Optional[int] = None


class MSPName(StrictBaseXMLModel, tags=["mspname"]):
person_id: str = Field(
validation_alias=AliasChoices("person_id", "id"),
serialization_alias="id",
pattern=person_id_pattern,
default=None,
) # scotland uses id rather than person_id
vote: str
proxy: Optional[str] = None
Expand All @@ -153,13 +163,25 @@ class MSPName(StrictBaseXMLModel, tags=["mspname"]):
class RepName(
StrictBaseXMLModel, tags=["repname", "mpname", "msname", "mlaname", "lord"]
):
person_id: str = Field(pattern=person_id_pattern)
person_id: Optional[str] = Field(pattern=person_id_pattern, default=None)
member_id: Optional[str] = Field(
validation_alias=AliasChoices("id"),
serialization_alias="id",
pattern=member_id_pattern,
default=None,
)
vote: str
teller: Optional[str] = None
proxy: Optional[str] = None
name: TextStr


def seperate_out_msp(value: Any) -> str:
if value["@tag"] == "mspname":
return "msp"
return "rep"


class RepList(
StrictBaseXMLModel,
tags=["replist", "mplist", "msplist", "mslist", "mlalist", "lordlist"],
Expand All @@ -180,7 +202,12 @@ class RepList(
"abstentions",
"didnotvote",
]
items: Items[Union[MSPName, RepName]]
items: Items[
Annotated[
Union[Annotated[MSPName, Tag("msp")], Annotated[RepName, Tag("rep")]],
Discriminator(seperate_out_msp),
]
]


class Motion(StrictBaseXMLModel, tags=["motion"]):
Expand Down Expand Up @@ -209,6 +236,7 @@ class Division(StrictBaseXMLModel, tags=["division"]):
divnumber: int
colnum: Optional[int] = None
time: Optional[str] = None
url: Optional[str] = None
count: AsAttrSingle[Optional[DivisionCount]]
rel_motions: AsAttr[list[Motion]] = []
representatives: Items[RepList]
Expand All @@ -230,7 +258,9 @@ class Transcript(StrictBaseXMLModel, tags=["publicwhip"]):
TranscriptType: ClassVar[Type[TranscriptType]] = TranscriptType
scraper_version: Optional[str] = Field(
default=None,
validation_alias=AliasChoices("scraper_version", "scraperversion"),
validation_alias=AliasChoices(
"scraper_version", "scraperversion", "scrapeversion"
),
serialization_alias="scraperversion",
)
latest: Optional[str] = Field(default=None)
Expand Down
9 changes: 9 additions & 0 deletions tests/test_transcript.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import date
from pathlib import Path

from mysoc_validator.models.transcripts import Transcript
Expand All @@ -7,6 +8,14 @@ def test_transcript_load():
Transcript.from_xml_path(Path("data", "debates2023-03-28d.xml"))


def test_transcript_date():
Transcript.from_parlparse(
date(2015, 1, 20),
chamber=Transcript.Chamber.COMMONS,
transcript_type=Transcript.TranscriptType.DEBATES,
)


def test_transcript_round_trip():
t = Transcript.from_xml_path(Path("data", "debates2023-03-28d.xml"))

Expand Down

0 comments on commit 08fbaf7

Please sign in to comment.