diff --git a/kerascv/layers/ssd_box_coder.py b/kerascv/layers/ssd_box_coder.py new file mode 100644 index 0000000000..3adb3f8de6 --- /dev/null +++ b/kerascv/layers/ssd_box_coder.py @@ -0,0 +1,125 @@ +import tensorflow as tf + + +class SSDBoxCoder(tf.keras.layers.Layer): + """Defines a SSDBoxCoder that converts ground_truth_boxes using anchors. + + Mathematically, the encoding result is: + ty = (cy_gt - cy_a) / height_a + tx = (cx_gt - cx_a) / width_a + th = log(height_gt / height_a) + tw = log(width_gt / width_a) + + where cx, cy, width, height represents center of width, center of height, + width, height respectively, and subscript `gt` represents ground truth box, + `a` represents anchor. + + The `boxes` must have the same shape as `anchors`, this is typically the result + of assigning `ground_truth_boxes` to anchors based on a certain matching + strategy (argmax, bipartite) + + # Attributes: + center_variances: The 1-D scaling factor with 2 floats. This is used to + represent the variance of center of height and center of width in + Gaussian distribution when labeling the ground truth boxes. + During encoding, the result [ty, tx] will be divided, i.e., normalized + by the variances. During decoding, the result will be multiplied, i.e., + denormalized by the variances. Defaults to `None` where no variance is + applied. The SSD paper uses [.1, .1]. + size_variances: The 1-D scaling factor with 2 floats. This is used to + represent the variance of height and width in Gaussian distribution when + labeling the ground truth boxes. During encoding, the result [th, tw] + will be divided, i.e., normalized by the variances. During decoding, the + result will be multiplied, i.e., denormalized by the variances. Defaults + to `None` where no variance is applied. The SSD paper uses [.2, .2]. + invert: Boolean to indicate whether the layer should encode the `boxes`, + i.e., convert from [y_min, x_min, y_max, x_max] format to [ty, tx, h, w] + format, if True, or the other way around, if False. Defaults to 'False'. + + # References + [Wei Liu et al., 2015](https://arxiv.org/abs/1512.02325) + """ + + def __init__( + self, + center_variances=None, + size_variances=None, + invert=False, + name=None, + **kwargs + ): + if center_variances is not None and size_variances is not None: + self.center_variances = center_variances + self.size_variances = size_variances + elif center_variances is not None or size_variances is not None: + raise ValueError( + "`center_variances` and `size_variances` should both be None or " + "tuple of floats, got {}, {}".format(center_variances, size_variances) + ) + else: + self.center_variances = None + self.size_variances = None + self.invert = invert + super(SSDBoxCoder, self).__init__(name=name, **kwargs) + + def call(self, boxes, anchors): + def corner_to_centroids(box_tensor): + box_tensor = tf.cast(box_tensor, tf.float32) + y_min, x_min, y_max, x_max = tf.split( + box_tensor, num_or_size_splits=4, axis=-1 + ) + height = y_max - y_min + width = x_max - x_min + cy = y_min + 0.5 * height + cx = x_min + 0.5 * width + return ( + cy, + cx, + height + tf.keras.backend.epsilon(), + width + tf.keras.backend.epsilon(), + ) + + cy_a, cx_a, height_a, width_a = corner_to_centroids(anchors) + + if not self.invert: + cy_gt, cx_gt, height_gt, width_gt = corner_to_centroids(boxes) + ty = (cy_gt - cy_a) / height_a + tx = (cx_gt - cx_a) / width_a + th = tf.math.log(height_gt / height_a) + tw = tf.math.log(width_gt / width_a) + + if self.center_variances is not None: + ty = ty / tf.cast(self.center_variances[0], dtype=ty.dtype) + tx = tx / tf.cast(self.center_variances[1], dtype=tx.dtype) + th = th / tf.cast(self.size_variances[0], dtype=th.dtype) + tw = tw / tf.cast(self.size_variances[1], dtype=tw.dtype) + + return tf.concat([ty, tx, th, tw], axis=-1) + + else: + ty, tx, th, tw = tf.split(boxes, num_or_size_splits=4, axis=-1) + if self.center_variances is not None: + ty = ty * tf.cast(self.center_variances[0], dtype=ty.dtype) + tx = tx * tf.cast(self.center_variances[1], dtype=tx.dtype) + th = th * tf.cast(self.size_variances[0], dtype=th.dtype) + tw = tw * tf.cast(self.size_variances[1], dtype=tw.dtype) + + height_gt = tf.math.exp(th) * height_a + width_gt = tf.math.exp(tw) * width_a + cy_gt = ty * height_a + cy_a + cx_gt = tx * width_a + cx_a + y_min_gt = cy_gt - 0.5 * height_gt + y_max_gt = cy_gt + 0.5 * height_gt + x_min_gt = cx_gt - 0.5 * width_gt + x_max_gt = cx_gt + 0.5 * width_gt + + return tf.concat([y_min_gt, x_min_gt, y_max_gt, x_max_gt], axis=-1) + + def get_config(self): + config = { + "center_variances": self.center_variances, + "size_variances": self.size_variances, + "invert": self.invert, + } + base_config = super(SSDBoxCoder, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/tests/kerascv/layers/ssd_box_coder_test.py b/tests/kerascv/layers/ssd_box_coder_test.py new file mode 100644 index 0000000000..cb9d03b768 --- /dev/null +++ b/tests/kerascv/layers/ssd_box_coder_test.py @@ -0,0 +1,48 @@ +import numpy as np +from kerascv.layers.ssd_box_coder import SSDBoxCoder + + +def test_encode_decode_variance(): + gt_boxes = np.asarray([[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]], np.float32) + anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]], np.float32) + encode_layer = SSDBoxCoder( + center_variances=[0.5, 1 / 3], size_variances=[0.25, 0.2] + ) + encoded_gt_boxes = encode_layer(gt_boxes, anchors) + expected_out = np.asarray( + [ + [-1.0, -1.25, -1.62186, -0.911608], + [-0.166667, -0.666667, -2.772588, -5.493062], + ] + ) + np.testing.assert_allclose(expected_out, encoded_gt_boxes, rtol=1e-06, atol=1e-6) + + decode_layer = SSDBoxCoder( + center_variances=[0.5, 1 / 3], size_variances=[0.25, 0.2], invert=True + ) + decoded_gt_boxes = decode_layer(encoded_gt_boxes, anchors) + np.testing.assert_allclose(gt_boxes, decoded_gt_boxes, rtol=1e-6, atol=1e-6) + + +def test_encode_decode_no_variance(): + gt_boxes = np.asarray([[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]], np.float32) + anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]], np.float32) + encode_layer = SSDBoxCoder() + encoded_gt_boxes = encode_layer(gt_boxes, anchors) + expected_out = np.asarray( + [[-0.5, -0.41666, -0.40546, -0.18232], [-0.08333, -0.22222, -0.69314, -1.0986]] + ) + np.testing.assert_allclose(expected_out, encoded_gt_boxes, rtol=1e-05, atol=1e-5) + + decode_layer = SSDBoxCoder(invert=True) + decoded_gt_boxes = decode_layer(encoded_gt_boxes, anchors) + np.testing.assert_allclose(gt_boxes, decoded_gt_boxes, rtol=1e-6, atol=1e-6) + + +def test_config_with_custom_name(): + layer = SSDBoxCoder( + center_variances=[0.1, 0.1], size_variances=[0.2, 0.2], name="box_coder" + ) + config = layer.get_config() + layer_1 = SSDBoxCoder.from_config(config) + np.testing.assert_equal(layer_1.name, layer.name)