Added support for 3 bands float32 tiff images

airctic · Aug 27, 2022 · 2dc69c1 · 2dc69c1
1 parent 72d30c9
commit 2dc69c1
Show file tree

Hide file tree

Showing 34 changed files with 407 additions and 511 deletions.
diff --git a/environment.yml b/environment.yml
@@ -16,4 +16,5 @@ dependencies:
       - icevision[all]==0.12.0
       - --find-links https://download.openmmlab.com/mmcv/dist/cu102/torch1.10.0/index.html
       - mmcv-full==1.3.17
-      - mmdet==2.17.0
+      - mmdet==2.17.0
+      - rasterio>=1.3.0, <2
diff --git a/icevision/core/mask.py b/icevision/core/mask.py
@@ -176,7 +176,7 @@ def __init__(self, filepath: Union[str, Path], drop_void: bool = True):
         self.drop_void = drop_void
 
     def to_mask(self, h, w) -> MaskArray:
-        mask_arr = np.array(Image.open(self.filepath))
+        mask_arr = np.array(open_img(self.filepath, ensure_no_data_convert=True))
         obj_ids = np.unique(mask_arr)[1:]
         masks = mask_arr == obj_ids[:, None, None]
 

diff --git a/icevision/core/record_components.py b/icevision/core/record_components.py
@@ -115,13 +115,8 @@ def __init__(self, task=tasks.common):
     def set_img(self, img: Union[PIL.Image.Image, np.ndarray]):
         assert isinstance(img, (PIL.Image.Image, np.ndarray))
         self.img = img
-        if isinstance(img, PIL.Image.Image):
-            width, height = img.size
-        elif isinstance(img, np.ndarray):
-            # else:
-            height, width, _ = self.img.shape
-        # this should set on SizeRecordComponent
-        self.composite.set_img_size(ImgSize(width=width, height=height), original=True)
+        img_size = get_img_size_from_data(img)
+        self.composite.set_img_size(img_size, original=True)
 
     def _repr(self) -> List[str]:
         if self.img is not None:

diff --git a/icevision/data/convert_records_to_fo.py b/icevision/data/convert_records_to_fo.py
@@ -13,6 +13,7 @@
 from icevision.data.prediction import Prediction
 from icevision.models.inference import postprocess_bbox
 from icevision.soft_dependencies import SoftDependencies
+from icevision.utils.imageio import get_img_size
 
 from typing import Any, Callable, Union, Iterable, List, Tuple
 
@@ -93,30 +94,31 @@ def convert_record_to_fo_sample(
             _internal_filepath = record.common.filepath
 
     # Prepare undo bbox tfms fn
-    img = Image.open(_internal_filepath)
+    img_size = get_img_size(_internal_filepath)
     if undo_bbox_tfms_fn is not None:
         _internal_undo_bbox_tfms = lambda bbox: _convert_bbox_to_fo_bbox(
-            undo_bbox_tfms_fn(bbox), img.width, img.height
+            undo_bbox_tfms_fn(bbox), img_size.width, img_size.height
         )
     elif transformations is not None:
         _internal_undo_bbox_tfms = lambda bbox: _convert_bbox_to_fo_bbox(
             postprocess_bbox(
-                img, bbox, transformations, record.common.width, record.common.height
+                img_size,
+                bbox,
+                transformations,
+                record.common.width,
+                record.common.height,
             ),
-            img.width,
-            img.height,
+            img_size.width,
+            img_size.height,
         )
     else:
         _internal_undo_bbox_tfms = lambda bbox: _convert_bbox_to_fo_bbox(
-            bbox, img.width, img.height
+            bbox, img_size.width, img_size.height
         )
 
     # Get fo.Detections
     detections = record_to_fo_detections(record, _internal_undo_bbox_tfms)
 
-    # Unload image to save RAM
-    img.close()
-
     # Get sample after successful detection
     if sample is None:
         sample = Sample(_internal_filepath)

diff --git a/icevision/imports.py b/icevision/imports.py
@@ -1,6 +1,7 @@
 import sys, os, re, shutil, typing, itertools, operator, math, warnings, json, random
 import functools, io, cv2, mimetypes, torch, torchvision, dataclasses, zipfile, pickle
 import PIL
+import rasterio
 from copy import copy, deepcopy
 
 from pdb import set_trace

diff --git a/icevision/models/inference.py b/icevision/models/inference.py
@@ -11,12 +11,13 @@
 from icevision.utils.imageio import *
 from icevision.visualize.draw_data import *
 from icevision.visualize.utils import *
+from icevision.utils.imageio import get_img_size_from_data
 
 DEFAULT_FONT_PATH = get_default_font()
 
 
 def _end2end_detect(
-    img: Union[PIL.Image.Image, Path, str],
+    img: Union[PIL.Image.Image, np.ndarray, Path, str],
     transforms: albumentations_adapter.Adapter,
     model: torch.nn.Module,
     class_map: ClassMap,
@@ -37,7 +38,7 @@ def _end2end_detect(
 
     Parameters
     ----------
-    img: image to run inference on. It can be a string, Path or PIL.Image
+    img: image to run inference on. It can be a string, Path or PIL.Image or numpy
     transforms: icevision albumentations transforms
     model: model to run inference with
     class_map: ClassMap with the available categories
@@ -59,7 +60,7 @@ def _end2end_detect(
                    Bounding boxes are adjusted to the original image size and aspect ratio
     """
     if isinstance(img, (str, Path)):
-        img = PIL.Image.open(Path(img))
+        img = open_img(str(img), ensure_no_data_convert=True)
 
     infer_ds = Dataset.from_images([np.array(img)], transforms, class_map=class_map)
     pred = predict_fn(model, infer_ds, detection_threshold=detection_threshold)[0]
@@ -84,8 +85,8 @@ def _end2end_detect(
     else:
         record._unload()
 
-    w, h = img.shape
-    record.set_img_size(ImgSize(width=w, height=h))
+    img_size = get_img_size_from_data(img)
+    record.set_img_size(img_size)
 
     pred_dict = record.as_dict()
 
@@ -95,8 +96,8 @@ def _end2end_detect(
     else:
         pred_dict["img"] = None
 
-    pred_dict["width"] = w
-    pred_dict["height"] = h
+    pred_dict["width"] = img_size.width
+    pred_dict["height"] = img_size.height
     # delete the `common` key that holds both the `img` and its shape
     del pred_dict["common"]
 
@@ -107,7 +108,7 @@ def _end2end_detect(
 
 def process_bbox_predictions(
     pred: Prediction,
-    img: PIL.Image.Image,
+    img: Union[PIL.Image.Image, np.ndarray],
     transforms: List[Any],
 ) -> List[Dict[str, Any]]:
     """
@@ -123,14 +124,16 @@ def process_bbox_predictions(
     -------
     List of dicts with class, score and bbox coordinates
     """
+    img = np.array(img)
     bboxes = []
     for bbox, score, label in zip(
         pred.pred.detection.bboxes,
         pred.pred.detection.scores,
         pred.pred.detection.labels,
     ):
+        img_size = get_img_size_from_data(img)
         xmin, ymin, xmax, ymax = postprocess_bbox(
-            img, bbox, transforms, pred.pred.height, pred.pred.width
+            img_size, bbox, transforms, pred.pred.height, pred.pred.width
         )
 
         bbox = BBox.from_xyxy(xmin, ymin, xmax, ymax)
@@ -142,14 +145,14 @@ def process_bbox_predictions(
 
 
 def postprocess_bbox(
-    img: PIL.Image.Image, bbox: BBox, transforms: List[Any], h_after: int, w_after: int
+    img_size: ImgSize, bbox: BBox, transforms: List[Any], h_after: int, w_after: int
 ) -> Tuple[int, int, int, int]:
     """
     Post-process predicted bbox to adjust coordinates to input image size.
 
     Parameters
     ----------
-    img: original image, before any model-pre-processing done
+    img_size: original image size, before any model-pre-processing done
     bbox: predicted bbox
     transforms: list of model-pre-processing transforms
     h_after: height of image after model-pre-processing transforms
@@ -159,11 +162,10 @@ def postprocess_bbox(
     -------
     Tuple with (xmin, ymin, xmax, ymax) rescaled and re-adjusted to match the original image size
     """
-    w_before, h_before = img.size
-    h_after, w_after = get_size_without_padding(transforms, img, h_after, w_after)
+    h_after, w_after = get_size_without_padding(transforms, img_size, h_after, w_after)
     pad = np.abs(h_after - w_after) // 2
 
-    h_scale, w_scale = h_after / h_before, w_after / w_before
+    h_scale, w_scale = h_after / img_size.height, w_after / img_size.width
     if h_after < w_after:
         xmin, xmax, ymin, ymax = (
             int(bbox.xmin),
@@ -204,15 +206,17 @@ def draw_img_and_boxes(
     label_border_color: Union[np.array, list, tuple, str] = (255, 255, 0),
 ) -> PIL.Image.Image:
 
-    if not isinstance(img, PIL.Image.Image):
-        img = np.array(img)
+    img = np.array(img)
 
     # convert dict to record
     record = ObjectDetectionRecord()
-    w, h = img.shape
-    record.img = np.array(img)
-    record.set_img_size(ImgSize(width=w, height=h))
+    record.img = img
+
+    img_size = get_img_size_from_data(img)
+    record.set_img_size(img_size)
+
     record.detection.set_class_map(class_map)
+
     for bbox in bboxes:
         record.detection.add_bboxes([BBox.from_xyxy(*bbox["bbox"])])
         record.detection.add_labels([bbox["class"]])

diff --git a/icevision/models/inference_sahi.py b/icevision/models/inference_sahi.py
@@ -73,7 +73,7 @@ def category_names(self):
 
     def get_sliced_prediction(
         self,
-        image: Union[PIL.Image.Image, Path, str],
+        image: Union[PIL.Image.Image, np.ndarray, Path, str],
         keep_sahi_format: bool = False,
         display_label: bool = True,
         display_bbox: bool = True,
@@ -117,11 +117,13 @@ def get_sliced_prediction(
             record.detection.add_bboxes(bboxes)
             record.detection.set_scores(np.array(scores))
 
+            img_path = str(image)
             if isinstance(image, (str, Path)):
-                image = PIL.Image.open(Path(image))
+                image = open_img(img_path, ensure_no_data_convert=True)
 
             record.set_img(image)
-            w, h = image.shape
+
+            img_size = get_img_size(img_path)
 
             if return_img:
                 pred_img = draw_record(
@@ -144,8 +146,8 @@ def get_sliced_prediction(
             else:
                 pred_dict["img"] = None
 
-            pred_dict["width"] = w
-            pred_dict["height"] = h
+            pred_dict["width"] = img_size.width
+            pred_dict["height"] = img_size.height
 
             del pred_dict["common"]
 

diff --git a/icevision/tfms/albumentations/albumentations_adapter.py b/icevision/tfms/albumentations/albumentations_adapter.py
@@ -303,9 +303,9 @@ def apply(self, record):
         self._albu_out = tfms(**self._albu_in)
 
         # store additional info (might be used by components on `collect`)
-        height, width, _ = self._albu_out["image"].shape
+        height, width, *_ = self._albu_out["image"].shape
         height, width = get_size_without_padding(
-            self.tfms_list, record.img, height, width
+            self.tfms_list, ImgSize(width=width, height=height), height, width
         )
         self._size_no_padding = ImgSize(width=width, height=height)
 

diff --git a/icevision/tfms/albumentations/albumentations_helpers.py b/icevision/tfms/albumentations/albumentations_helpers.py
@@ -4,6 +4,7 @@
 
 from icevision.imports import *
 from icevision.core import *
+from icevision.utils.imageio import ImgSize
 
 
 def resize(size, ratio_resize=A.LongestMaxSize):
@@ -86,7 +87,7 @@ def aug_tfms(
 
 
 def get_size_without_padding(
-    tfms_list: List[Any], before_tfm_img: PIL.Image.Image, height: int, width: int
+    tfms_list: List[Any], before_tfm_img_size: ImgSize, height: int, width: int
 ) -> Tuple[int, int]:
     """
     Infer the height and width of the pre-processed image after removing padding.
@@ -95,7 +96,7 @@ def get_size_without_padding(
     ----------
     tfms_list: list of albumentations transforms applied to the `before_tfm_img` image
                 before passing it to the model for inference.
-    before_tfm_img: original image before being pre-processed for inference.
+    before_tfm_img_size: original image size before being pre-processed for inference.
     height: height of output image from icevision `predict` function.
     width: width of output image from icevision `predict` function.
 
@@ -104,17 +105,20 @@ def get_size_without_padding(
     height and width of the image coming out of the inference pipeline, after removing padding
     """
     if get_transform(tfms_list, "Pad") is not None:
-        before_pad_h, before_pad_w, _ = np.array(before_tfm_img).shape
 
         t = get_transform(tfms_list, "SmallestMaxSize")
         if t is not None:
             presize = t.max_size
-            height, width = func_max_size(before_pad_h, before_pad_w, presize, min)
+            height, width = func_max_size(
+                before_tfm_img_size.height, before_tfm_img_size.width, presize, min
+            )
 
         t = get_transform(tfms_list, "LongestMaxSize")
         if t is not None:
             size = t.max_size
-            height, width = func_max_size(before_pad_h, before_pad_w, size, max)
+            height, width = func_max_size(
+                before_tfm_img_size.height, before_tfm_img_size.width, size, max
+            )
 
     return height, width