From 29c92a1f7f718ee6eb1bb748b008ab8111c487bb Mon Sep 17 00:00:00 2001 From: felix Date: Mon, 8 Jul 2024 14:47:07 +0200 Subject: [PATCH] move to pil --- onnxtr/models/preprocessor/base.py | 14 ++--- onnxtr/transforms/base.py | 79 ++++++++++-------------- tests/common/test_models_preprocessor.py | 2 +- tests/common/test_transforms.py | 14 ++--- 4 files changed, 48 insertions(+), 61 deletions(-) diff --git a/onnxtr/models/preprocessor/base.py b/onnxtr/models/preprocessor/base.py index 13c851e..b4eed38 100644 --- a/onnxtr/models/preprocessor/base.py +++ b/onnxtr/models/preprocessor/base.py @@ -67,11 +67,12 @@ def sample_transforms(self, x: np.ndarray) -> np.ndarray: if x.dtype not in (np.uint8, np.float32): raise TypeError("unsupported data type for numpy.ndarray") x = shape_translate(x, "HWC") - # Data type & 255 division - if x.dtype == np.uint8: - x = x.astype(np.float32) / 255.0 + # Resizing x = self.resize(x) + # Data type & 255 division + if x.dtype == np.uint8 or np.max(x) > 1: + x = x.astype(np.float32) / 255.0 return x @@ -95,13 +96,12 @@ def __call__(self, x: Union[np.ndarray, List[np.ndarray]]) -> List[np.ndarray]: raise TypeError("unsupported data type for numpy.ndarray") x = shape_translate(x, "BHWC") - # Data type & 255 division - if x.dtype == np.uint8: - x = x.astype(np.float32) / 255.0 # Resizing if (x.shape[1], x.shape[2]) != self.resize.output_size: x = np.array([self.resize(sample) for sample in x]) - + # Data type & 255 division + if x.dtype == np.uint8 or np.max(x) > 1: + x = x.astype(np.float32) / 255.0 batches = [x] elif isinstance(x, list) and all(isinstance(sample, np.ndarray) for sample in x): diff --git a/onnxtr/transforms/base.py b/onnxtr/transforms/base.py index a226338..2e7203b 100644 --- a/onnxtr/transforms/base.py +++ b/onnxtr/transforms/base.py @@ -5,8 +5,8 @@ from typing import Tuple, Union -import cv2 import numpy as np +from PIL import Image, ImageOps __all__ = ["Resize", "Normalize"] @@ -17,64 +17,51 @@ class Resize: def __init__( self, size: Union[int, Tuple[int, int]], - interpolation=cv2.INTER_LINEAR, + interpolation=Image.Resampling.BILINEAR, preserve_aspect_ratio: bool = False, symmetric_pad: bool = False, ) -> None: - super().__init__() - self.size = size + self.size = size if isinstance(size, tuple) else (size, size) self.interpolation = interpolation self.preserve_aspect_ratio = preserve_aspect_ratio self.symmetric_pad = symmetric_pad self.output_size = size if isinstance(size, tuple) else (size, size) - if not isinstance(self.size, (int, tuple, list)): - raise AssertionError("size should be either a tuple, a list or an int") + if not isinstance(self.size, (tuple, int)): + raise AssertionError("size should be either a tuple or an int") - def __call__( - self, - img: np.ndarray, - ) -> np.ndarray: - if img.ndim == 3: - h, w = img.shape[0:2] - else: - h, w = img.shape[1:3] - sh, sw = self.size if isinstance(self.size, tuple) else (self.size, self.size) + def __call__(self, img: np.ndarray) -> np.ndarray: + img = (img * 255).astype(np.uint8) if img.dtype != np.uint8 else img + h, w = img.shape[:2] if img.ndim == 3 else img.shape[1:3] + sh, sw = self.size - # Calculate aspect ratio of the image - aspect = w / h + if not self.preserve_aspect_ratio: + return np.array(Image.fromarray(img).resize((sw, sh), resample=self.interpolation)) - # Compute scaling and padding sizes - if self.preserve_aspect_ratio: - if aspect > 1: # Horizontal image - new_w = sw - new_h = int(sw / aspect) - elif aspect < 1: # Vertical image - new_h = sh - new_w = int(sh * aspect) - else: # Square image - new_h, new_w = sh, sw - - img_resized = cv2.resize(img, (new_w, new_h), interpolation=self.interpolation) - - # Calculate padding - pad_top = max((sh - new_h) // 2, 0) - pad_bottom = max(sh - new_h - pad_top, 0) - pad_left = max((sw - new_w) // 2, 0) - pad_right = max(sw - new_w - pad_left, 0) - - # Pad the image - img_resized = cv2.copyMakeBorder( # type: ignore[call-overload] - img_resized, pad_top, pad_bottom, pad_left, pad_right, borderType=cv2.BORDER_CONSTANT, value=0 - ) - - # Ensure the image matches the target size by resizing it again if needed - img_resized = cv2.resize(img_resized, (sw, sh), interpolation=self.interpolation) + actual_ratio = h / w + target_ratio = sh / sw + + if target_ratio == actual_ratio: + return np.array(Image.fromarray(img).resize((sw, sh), resample=self.interpolation)) + + if actual_ratio > target_ratio: + tmp_size = (int(sh / actual_ratio), sh) else: - # Resize the image without preserving aspect ratio - img_resized = cv2.resize(img, (sw, sh), interpolation=self.interpolation) + tmp_size = (sw, int(sw * actual_ratio)) + + img_resized = Image.fromarray(img).resize(tmp_size, resample=self.interpolation) + pad_left = pad_top = 0 + pad_right = sw - img_resized.width + pad_bottom = sh - img_resized.height + + if self.symmetric_pad: + pad_left = pad_right // 2 + pad_right -= pad_left + pad_top = pad_bottom // 2 + pad_bottom -= pad_top - return img_resized + img_resized = ImageOps.expand(img_resized, (pad_left, pad_top, pad_right, pad_bottom)) + return np.array(img_resized) def __repr__(self) -> str: interpolate_str = self.interpolation diff --git a/tests/common/test_models_preprocessor.py b/tests/common/test_models_preprocessor.py index 68e3c53..cd01011 100644 --- a/tests/common/test_models_preprocessor.py +++ b/tests/common/test_models_preprocessor.py @@ -35,5 +35,5 @@ def test_preprocessor(batch_size, output_size, input_tensor, expected_batches, e assert all(isinstance(b, np.ndarray) for b in out) assert all(b.dtype == np.float32 for b in out) assert all(b.shape[1:3] == output_size for b in out) - assert all(np.all(np.abs(b - expected_value) < 1e-6) for b in out) + assert all(np.all(b == expected_value) for b in out) assert len(repr(processor).split("\n")) == 4 diff --git a/tests/common/test_transforms.py b/tests/common/test_transforms.py index 6d84d0c..fc2d96a 100644 --- a/tests/common/test_transforms.py +++ b/tests/common/test_transforms.py @@ -10,23 +10,23 @@ def test_resize(): input_t = np.ones((64, 64, 3), dtype=np.float32) out = transfo(input_t) - assert np.all(out == 1) + assert np.all(out == 255) assert out.shape[:2] == output_size - assert repr(transfo) == f"Resize(output_size={output_size}, interpolation='1')" + assert repr(transfo) == f"Resize(output_size={output_size}, interpolation='2')" transfo = Resize(output_size, preserve_aspect_ratio=True) input_t = np.ones((32, 64, 3), dtype=np.float32) out = transfo(input_t) assert out.shape[:2] == output_size - assert not np.all(out == 1) + assert not np.all(out == 255) # Asymetric padding - assert np.all(out[-1] == 0) and np.all(out[0] == 0) + assert np.all(out[-1] == 0) and np.all(out[0] == 255) # Symetric padding transfo = Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True) assert repr(transfo) == ( - f"Resize(output_size={output_size}, interpolation='1', " f"preserve_aspect_ratio=True, symmetric_pad=True)" + f"Resize(output_size={output_size}, interpolation='2', " f"preserve_aspect_ratio=True, symmetric_pad=True)" ) out = transfo(input_t) assert out.shape[:2] == output_size @@ -34,7 +34,7 @@ def test_resize(): assert np.all(out[-1] == 0) and np.all(out[0] == 0) # Inverse aspect ratio - input_t = np.ones((3, 64, 32), dtype=np.float32) + input_t = np.ones((64, 32, 3), dtype=np.float32) out = transfo(input_t) assert not np.all(out == 1) @@ -43,7 +43,7 @@ def test_resize(): # Same aspect ratio output_size = (32, 128) transfo = Resize(output_size, preserve_aspect_ratio=True) - out = transfo(np.ones((3, 16, 64), dtype=np.float32)) + out = transfo(np.ones((16, 64, 3), dtype=np.float32)) assert out.shape[:2] == output_size