diff --git a/openfoodfacts_exports/exports/parquet/common.py b/openfoodfacts_exports/exports/parquet/common.py index 45e4dcb..088fc82 100644 --- a/openfoodfacts_exports/exports/parquet/common.py +++ b/openfoodfacts_exports/exports/parquet/common.py @@ -23,6 +23,7 @@ class Image(BaseModel): sizes: dict[str, ImageSize] | None = None uploaded_t: int | None = None imgid: int | None = None + rev: int | None = None uploader: str | None = None @model_validator(mode="after") @@ -242,13 +243,13 @@ def parse_language_fields(cls, data: dict) -> dict: @classmethod def parse_images(cls, data: dict) -> dict: """Parse images field into a list of dictionaries with `key`, `imgid`, - `sizes`, `uploaded_t`, and `uploader` keys. + `rev`, `sizes`, `uploaded_t`, and `uploader` keys. In Open Food Facts, images are stored as a dictionary with the image key as the key and the image data as the value. To make the schema compatible with Parquet, we convert these fields - into a list of dictionaries with `key`, `imgid`, `sizes`, `uploaded_t`, + into a list of dictionaries with `key`, `imgid`, `rev`, `sizes`, `uploaded_t`, and `uploader` keys. We copy the image key (ex: `3`, `nutrition_fr`,...) from the original dictionary and add it as a field under the `key` key. """ @@ -283,6 +284,7 @@ def parse_owner_fields(cls, data: dict): [ pa.field("key", pa.string(), nullable=True), pa.field("imgid", pa.int32(), nullable=True), + pa.field("rev", pa.int32(), nullable=True), pa.field( "sizes", pa.struct(