-
-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Generate CSV from parquet files #19
Comments
@jeremyarancio I know you are working on this but I created the issue :-) |
Conversion is going! I just attacked image_url's fields in the CSV, and I noticed "rev" was missing in the Parquet dataset code = 4061461479824
image_url = https://images.openfoodfacts.org/images/products/406/146/147/9824/front_de.37.400.jpg
image_small_url = https://images.openfoodfacts.org/images/products/406/146/147/9824/front_de.37.200.jpg
image_ingredients_url = https://images.openfoodfacts.org/images/products/406/146/147/9824/ingredients_de.9.400.jpg
image_ingredients_small_url = https://images.openfoodfacts.org/images/products/406/146/147/9824/ingredients_de.9.200.jpg
image_nutrition_url = https://images.openfoodfacts.org/images/products/406/146/147/9824/nutrition_de.36.400.jpg
image_nutrition_small_url = https://images.openfoodfacts.org/images/products/406/146/147/9824/nutrition_de.36.200.jpg The A PR to Parquet export will be added to fix it. For more info about image urls, check the doc From the JSONL rev = 40
images = {
"1": {
"uploader": "kiliweb",
"uploaded_t": 1628238343,
"sizes": {
"100": {"w": 50, "h": 100},
"full": {"h": 1200, "w": 601},
"400": {"h": 400, "w": 200},
"200": null
},
"x1": null,
"angle": null,
"normalize": null,
"coordinates_image_size": null,
"imgid": null,
"y1": null,
"x2": null,
"geometry": null,
"y2": null,
"white_magic": null,
"rev": null,
"ocr": null,
"orientation": null
},
"packaging_de": {
"uploader": null,
"uploaded_t": null,
"sizes": {
"100": {"w": 100, "h": 71},
"full": {"h": 848, "w": 1190},
"400": {"h": 285, "w": 400},
"200": {"w": 200, "h": 143}
},
"x1": "-1",
"angle": 0,
"normalize": null,
"coordinates_image_size": "full",
"imgid": "4",
"y1": "-1",
"x2": "-1",
"geometry": "0x0--1--1",
"y2": "-1",
"white_magic": null,
"rev": "12",
"ocr": null,
"orientation": null
},
"front_de": {
"uploader": null,
"uploaded_t": null,
"sizes": {
"100": {"w": 50, "h": 100},
"full": {"h": 1200, "w": 601},
"400": {"h": 400, "w": 200},
"200": {"w": 100, "h": 200}
},
"x1": "0",
"angle": "0",
"normalize": null,
"coordinates_image_size": "full",
"imgid": "1",
"y1": "0",
"x2": "601",
"geometry": "601x1200-0-0",
"y2": "1200",
"white_magic": null,
"rev": "37",
"ocr": null,
"orientation": null
},
"2": {
"uploader": "kiliweb",
"uploaded_t": 1628238343,
"sizes": {
"100": {"w": 70, "h": 100},
"full": {"h": 1200, "w": 839},
"400": {"h": 400, "w": 280},
"200": null
},
"x1": null,
"angle": null,
"normalize": null,
"coordinates_image_size": null,
"imgid": null,
"y1": null,
"x2": null,
"geometry": null,
"y2": null,
"white_magic": null,
"rev": null,
"ocr": null,
"orientation": null
},
"ingredients_de": {
"uploader": null,
"uploaded_t": null,
"sizes": {
"100": {"w": 100, "h": 18},
"full": {"h": 536, "w": 3024},
"400": {"h": 71, "w": 400},
"200": {"w": 200, "h": 35}
},
"x1": null,
"angle": null,
"normalize": null,
"coordinates_image_size": "400",
"imgid": "3",
"y1": null,
"x2": null,
"geometry": "0x0-0-0",
"y2": null,
"white_magic": null,
"rev": "9",
"ocr": null,
"orientation": null
},
"3": {
"uploader": "gehrmaja",
"uploaded_t": 1628588851,
"sizes": {
"100": {"w": 100, "h": 18},
"full": {"h": 536, "w": 3024},
"400": {"h": 71, "w": 400},
"200": null
},
"x1": null,
"angle": null,
"normalize": null,
"coordinates_image_size": null,
"imgid": null,
"y1": null,
"x2": null,
"geometry": null,
"y2": null,
"white_magic": null,
"rev": null,
"ocr": null,
"orientation": null
},
"6": {
"uploader": "femmenoire",
"uploaded_t": 1701512789,
"sizes": {
"100": {"w": 100, "h": 88},
"full": {"h": 1117, "w": 1274},
"400": {"h": 351, "w": 400},
"200": null
},
"x1": null,
"angle": null,
"normalize": null,
"coordinates_image_size": null,
"imgid": null,
"y1": null,
"x2": null,
"geometry": null,
"y2": null,
"white_magic": null,
"rev": null,
"ocr": null,
"orientation": null
},
"5": {
"uploader": "prepperapp",
"uploaded_t": 1690095501,
"sizes": {
"100": {"w": 100, "h": 100},
"full": {"h": 800, "w": 800},
"400": {"h": 400, "w": 400},
"200": null
},
"x1": null,
"angle": null,
"normalize": null,
"coordinates_image_size": null,
"imgid": null,
"y1": null,
"x2": null,
"geometry": null,
"y2": null,
"white_magic": null,
"rev": null,
"ocr": null,
"orientation": null
},
"4": {
"uploader": "gehrmaja",
"uploaded_t": 1628588949,
"sizes": {
"100": {"w": 100, "h": 71},
"full": {"h": 848, "w": 1190},
"400": {"h": 285, "w": 400},
"200": null
},
"x1": null,
"angle": null,
"normalize": null,
"coordinates_image_size": null,
"imgid": null,
"y1": null,
"x2": null,
"geometry": null,
"y2": null,
"white_magic": null,
"rev": null,
"ocr": null,
"orientation": null
},
"nutrition_de": {
"uploader": null,
"uploaded_t": null,
"sizes": {
"100": {"w": 100, "h": 88},
"full": {"h": 1117, "w": 1274},
"400": {"h": 351, "w": 400},
"200": {"w": 200, "h": 175}
},
"x1": "-1",
"angle": 0,
"normalize": null,
"coordinates_image_size": "full",
"imgid": "6",
"y1": "-1",
"x2": "-1",
"geometry": "0x0--1--1",
"y2": "-1",
"white_magic": null,
"rev": "36",
"ocr": null,
"orientation": null
}
} |
We want to avoid doing all the work to generate the CSV on product opener instance.
The text was updated successfully, but these errors were encountered: