keras-team · tanzhenyu · Jun 16, 2020 · Jun 16, 2020 · Jun 16, 2020 · Jun 19, 2020
diff --git a/kerascv/layers/ssd_box_coder.py b/kerascv/layers/ssd_box_coder.py
@@ -0,0 +1,125 @@
+import tensorflow as tf
+
+
+class SSDBoxCoder(tf.keras.layers.Layer):
+    """Defines a SSDBoxCoder that converts ground_truth_boxes using anchors.
+
+    Mathematically, the encoding result is:
+        ty = (cy_gt - cy_a) / height_a
+        tx = (cx_gt - cx_a) / width_a
+        th = log(height_gt / height_a)
+        tw = log(width_gt / width_a)
+
+    where cx, cy, width, height represents center of width, center of height,
+    width, height respectively, and subscript `gt` represents ground truth box,
+    `a` represents anchor.
+
+    The `boxes` must have the same shape as `anchors`, this is typically the result
+    of assigning `ground_truth_boxes` to anchors based on a certain matching
+    strategy (argmax, bipartite)
+
+    # Attributes:
+        center_variances: The 1-D scaling factor with 2 floats. This is used to
+            represent the variance of center of height and center of width in
+            Gaussian distribution when labeling the ground truth boxes.
+            During encoding, the result [ty, tx] will be divided, i.e., normalized
+            by the variances. During decoding, the result will be multiplied, i.e.,
+            denormalized by the variances. Defaults to `None` where no variance is
+            applied. The SSD paper uses [.1, .1].
+        size_variances: The 1-D scaling factor with 2 floats. This is used to
+            represent the variance of height and width in Gaussian distribution when
+            labeling the ground truth boxes. During encoding, the result [th, tw]
+            will be divided, i.e., normalized by the variances. During decoding, the
+            result will be multiplied, i.e., denormalized by the variances. Defaults
+            to `None` where no variance is applied. The SSD paper uses [.2, .2].
+        invert: Boolean to indicate whether the layer should encode the `boxes`,
+            i.e., convert from [y_min, x_min, y_max, x_max] format to [ty, tx, h, w]
+            format, if True, or the other way around, if False. Defaults to 'False'.
+
+    # References
+        [Wei Liu et al., 2015](https://arxiv.org/abs/1512.02325)
+    """
+
+    def __init__(
+        self,
+        center_variances=None,
+        size_variances=None,
+        invert=False,
+        name=None,
+        **kwargs
+    ):
+        if center_variances is not None and size_variances is not None:
+            self.center_variances = center_variances
+            self.size_variances = size_variances
+        elif center_variances is not None or size_variances is not None:
+            raise ValueError(
+                "`center_variances` and `size_variances` should both be None or "
+                "tuple of floats, got {}, {}".format(center_variances, size_variances)
+            )
+        else:
+            self.center_variances = None
+            self.size_variances = None
+        self.invert = invert
+        super(SSDBoxCoder, self).__init__(name=name, **kwargs)
+
+    def call(self, boxes, anchors):
+        def corner_to_centroids(box_tensor):
+            box_tensor = tf.cast(box_tensor, tf.float32)
+            y_min, x_min, y_max, x_max = tf.split(
+                box_tensor, num_or_size_splits=4, axis=-1
+            )
+            height = y_max - y_min
+            width = x_max - x_min
+            cy = y_min + 0.5 * height
+            cx = x_min + 0.5 * width
+            return (
+                cy,
+                cx,
+                height + tf.keras.backend.epsilon(),
+                width + tf.keras.backend.epsilon(),
+            )
+
+        cy_a, cx_a, height_a, width_a = corner_to_centroids(anchors)
+
+        if not self.invert:
+            cy_gt, cx_gt, height_gt, width_gt = corner_to_centroids(boxes)
+            ty = (cy_gt - cy_a) / height_a
+            tx = (cx_gt - cx_a) / width_a
+            th = tf.math.log(height_gt / height_a)
+            tw = tf.math.log(width_gt / width_a)
+
+            if self.center_variances is not None:
+                ty = ty / tf.cast(self.center_variances[0], dtype=ty.dtype)
+                tx = tx / tf.cast(self.center_variances[1], dtype=tx.dtype)
+                th = th / tf.cast(self.size_variances[0], dtype=th.dtype)
+                tw = tw / tf.cast(self.size_variances[1], dtype=tw.dtype)
+
+            return tf.concat([ty, tx, th, tw], axis=-1)
+
+        else:
+            ty, tx, th, tw = tf.split(boxes, num_or_size_splits=4, axis=-1)
+            if self.center_variances is not None:
+                ty = ty * tf.cast(self.center_variances[0], dtype=ty.dtype)
+                tx = tx * tf.cast(self.center_variances[1], dtype=tx.dtype)
+                th = th * tf.cast(self.size_variances[0], dtype=th.dtype)
+                tw = tw * tf.cast(self.size_variances[1], dtype=tw.dtype)
+
+            height_gt = tf.math.exp(th) * height_a
+            width_gt = tf.math.exp(tw) * width_a
+            cy_gt = ty * height_a + cy_a
+            cx_gt = tx * width_a + cx_a
+            y_min_gt = cy_gt - 0.5 * height_gt
+            y_max_gt = cy_gt + 0.5 * height_gt
+            x_min_gt = cx_gt - 0.5 * width_gt
+            x_max_gt = cx_gt + 0.5 * width_gt
+
+            return tf.concat([y_min_gt, x_min_gt, y_max_gt, x_max_gt], axis=-1)
+
+    def get_config(self):
+        config = {
+            "center_variances": self.center_variances,
+            "size_variances": self.size_variances,
+            "invert": self.invert,
+        }
+        base_config = super(SSDBoxCoder, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
diff --git a/tests/kerascv/layers/ssd_box_coder_test.py b/tests/kerascv/layers/ssd_box_coder_test.py
@@ -0,0 +1,48 @@
+import numpy as np
+from kerascv.layers.ssd_box_coder import SSDBoxCoder
+
+
+def test_encode_decode_variance():
+    gt_boxes = np.asarray([[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]], np.float32)
+    anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]], np.float32)
+    encode_layer = SSDBoxCoder(
+        center_variances=[0.5, 1 / 3], size_variances=[0.25, 0.2]
+    )
+    encoded_gt_boxes = encode_layer(gt_boxes, anchors)
+    expected_out = np.asarray(
+        [
+            [-1.0, -1.25, -1.62186, -0.911608],
+            [-0.166667, -0.666667, -2.772588, -5.493062],
+        ]
+    )
+    np.testing.assert_allclose(expected_out, encoded_gt_boxes, rtol=1e-06, atol=1e-6)
+
+    decode_layer = SSDBoxCoder(
+        center_variances=[0.5, 1 / 3], size_variances=[0.25, 0.2], invert=True
+    )
+    decoded_gt_boxes = decode_layer(encoded_gt_boxes, anchors)
+    np.testing.assert_allclose(gt_boxes, decoded_gt_boxes, rtol=1e-6, atol=1e-6)
+
+
+def test_encode_decode_no_variance():
+    gt_boxes = np.asarray([[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]], np.float32)
+    anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]], np.float32)
+    encode_layer = SSDBoxCoder()
+    encoded_gt_boxes = encode_layer(gt_boxes, anchors)
+    expected_out = np.asarray(
+        [[-0.5, -0.41666, -0.40546, -0.18232], [-0.08333, -0.22222, -0.69314, -1.0986]]
+    )
+    np.testing.assert_allclose(expected_out, encoded_gt_boxes, rtol=1e-05, atol=1e-5)
+
+    decode_layer = SSDBoxCoder(invert=True)
+    decoded_gt_boxes = decode_layer(encoded_gt_boxes, anchors)
+    np.testing.assert_allclose(gt_boxes, decoded_gt_boxes, rtol=1e-6, atol=1e-6)
+
+
+def test_config_with_custom_name():
+    layer = SSDBoxCoder(
+        center_variances=[0.1, 0.1], size_variances=[0.2, 0.2], name="box_coder"
+    )
+    config = layer.get_config()
+    layer_1 = SSDBoxCoder.from_config(config)
+    np.testing.assert_equal(layer_1.name, layer.name)