From 74aca2135d105bf9c97681b50a9b0a016a7ea745 Mon Sep 17 00:00:00 2001 From: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Date: Tue, 8 Aug 2023 13:52:16 +0200 Subject: [PATCH] Add int64 dtype (#338) When running the Datacomp pipeline at scale, I encountered an outlier image which had a width of about 37,000 pixels. As the component was using int16 as the dtype of the width column, this failed as it was outside the range. The error suggested to use int64 instead. Then I noticed int64 is not supported yet, hence this PR adds it. --- components/download_images/fondant_component.yaml | 4 ++-- components/filter_image_resolution/fondant_component.yaml | 4 ++-- components/image_cropping/fondant_component.yaml | 4 ++-- components/image_resolution_extraction/fondant_component.yaml | 4 ++-- .../cluster_image_embeddings/fondant_component.yaml | 4 ++-- .../components/load_from_hf_hub/fondant_component.yaml | 4 ++-- src/fondant/schemas/common.json | 1 + .../compiled_pipeline/example_2/docker-compose.yml | 4 ++-- 8 files changed, 15 insertions(+), 14 deletions(-) diff --git a/components/download_images/fondant_component.yaml b/components/download_images/fondant_component.yaml index 1efaa48d4..665ed4912 100644 --- a/components/download_images/fondant_component.yaml +++ b/components/download_images/fondant_component.yaml @@ -14,9 +14,9 @@ produces: data: type: binary width: - type: int16 + type: int32 height: - type: int16 + type: int32 additionalFields: false args: diff --git a/components/filter_image_resolution/fondant_component.yaml b/components/filter_image_resolution/fondant_component.yaml index dcac31145..98e9cea78 100644 --- a/components/filter_image_resolution/fondant_component.yaml +++ b/components/filter_image_resolution/fondant_component.yaml @@ -6,9 +6,9 @@ consumes: image: fields: width: - type: int16 + type: int32 height: - type: int16 + type: int32 args: min_image_dim: diff --git a/components/image_cropping/fondant_component.yaml b/components/image_cropping/fondant_component.yaml index b21a7ae1d..a4fedb9a4 100644 --- a/components/image_cropping/fondant_component.yaml +++ b/components/image_cropping/fondant_component.yaml @@ -14,9 +14,9 @@ produces: data: type: binary width: - type: int16 + type: int32 height: - type: int16 + type: int32 args: cropping_threshold: diff --git a/components/image_resolution_extraction/fondant_component.yaml b/components/image_resolution_extraction/fondant_component.yaml index cc917bad2..e3155ea6e 100644 --- a/components/image_resolution_extraction/fondant_component.yaml +++ b/components/image_resolution_extraction/fondant_component.yaml @@ -14,6 +14,6 @@ produces: data: type: binary width: - type: int16 + type: int32 height: - type: int16 \ No newline at end of file + type: int32 \ No newline at end of file diff --git a/examples/pipelines/datacomp/components/cluster_image_embeddings/fondant_component.yaml b/examples/pipelines/datacomp/components/cluster_image_embeddings/fondant_component.yaml index 7ea5ca294..d42ee5462 100644 --- a/examples/pipelines/datacomp/components/cluster_image_embeddings/fondant_component.yaml +++ b/examples/pipelines/datacomp/components/cluster_image_embeddings/fondant_component.yaml @@ -8,9 +8,9 @@ consumes: url: type: string width: - type: int16 + type: int32 height: - type: int16 + type: int32 face_bboxes: type: array items: diff --git a/examples/pipelines/datacomp/components/load_from_hf_hub/fondant_component.yaml b/examples/pipelines/datacomp/components/load_from_hf_hub/fondant_component.yaml index b89a10324..611b6b886 100644 --- a/examples/pipelines/datacomp/components/load_from_hf_hub/fondant_component.yaml +++ b/examples/pipelines/datacomp/components/load_from_hf_hub/fondant_component.yaml @@ -8,9 +8,9 @@ produces: url: type: string width: - type: int16 + type: int32 height: - type: int16 + type: int32 face_bboxes: type: array items: diff --git a/src/fondant/schemas/common.json b/src/fondant/schemas/common.json index 969ecd1a8..11df4e988 100644 --- a/src/fondant/schemas/common.json +++ b/src/fondant/schemas/common.json @@ -7,6 +7,7 @@ "int8", "int16", "int32", + "int64", "uint8", "uint16", "uint32", diff --git a/tests/example_pipelines/compiled_pipeline/example_2/docker-compose.yml b/tests/example_pipelines/compiled_pipeline/example_2/docker-compose.yml index 53be24617..df29f07f7 100644 --- a/tests/example_pipelines/compiled_pipeline/example_2/docker-compose.yml +++ b/tests/example_pipelines/compiled_pipeline/example_2/docker-compose.yml @@ -40,8 +40,8 @@ services: - '{"name": "Image cropping", "description": "Component that removes single-colored borders around images and crops them appropriately", "image": "ghcr.io/ml6team/image_cropping:dev", "consumes": {"images": {"fields": {"data": {"type": "binary"}}}}, "produces": - {"images": {"fields": {"data": {"type": "binary"}, "width": {"type": "int16"}, - "height": {"type": "int16"}}}}, "args": {"cropping_threshold": {"description": + {"images": {"fields": {"data": {"type": "binary"}, "width": {"type": "int32"}, + "height": {"type": "int32"}}}}, "args": {"cropping_threshold": {"description": "Threshold parameter used for detecting borders. A lower (negative) parameter results in a more performant border detection, but can cause overcropping. Default is -30", "type": "int", "default": -30}, "padding": {"description": "Padding