Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Object_Detection] Add BoxCoder for SSD and FasterRCNN #4

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions kerascv/layers/ssd_box_coder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import tensorflow as tf


class SSDBoxCoder(tf.keras.layers.Layer):
"""Defines a SSDBoxCoder that converts ground_truth_boxes using anchors.

Mathematically, the encoding result is:
ty = (cy_gt - cy_a) / height_a
tx = (cx_gt - cx_a) / width_a
th = log(height_gt / height_a)
tw = log(width_gt / width_a)

where cx, cy, width, height represents center of width, center of height,
width, height respectively, and subscript `gt` represents ground truth box,
`a` represents anchor.

The `boxes` must have the same shape as `anchors`, this is typically the result
of assigning `ground_truth_boxes` to anchors based on a certain matching
strategy (argmax, bipartite)

# Attributes:
center_variances: The 1-D scaling factor with 2 floats. This is used to
represent the variance of center of height and center of width in
Gaussian distribution when labeling the ground truth boxes.
During encoding, the result [ty, tx] will be divided, i.e., normalized
by the variances. During decoding, the result will be multiplied, i.e.,
denormalized by the variances. Defaults to `None` where no variance is
applied. The SSD paper uses [.1, .1].
size_variances: The 1-D scaling factor with 2 floats. This is used to
represent the variance of height and width in Gaussian distribution when
labeling the ground truth boxes. During encoding, the result [th, tw]
will be divided, i.e., normalized by the variances. During decoding, the
result will be multiplied, i.e., denormalized by the variances. Defaults
to `None` where no variance is applied. The SSD paper uses [.2, .2].
invert: Boolean to indicate whether the layer should encode the `boxes`,
i.e., convert from [y_min, x_min, y_max, x_max] format to [ty, tx, h, w]
format, if True, or the other way around, if False. Defaults to 'False'.

# References
[Wei Liu et al., 2015](https://arxiv.org/abs/1512.02325)
"""

def __init__(
self,
center_variances=None,
size_variances=None,
invert=False,
name=None,
**kwargs
):
if center_variances is not None and size_variances is not None:
self.center_variances = center_variances
self.size_variances = size_variances
elif center_variances is not None or size_variances is not None:
raise ValueError(
"`center_variances` and `size_variances` should both be None or "
"tuple of floats, got {}, {}".format(center_variances, size_variances)
)
else:
self.center_variances = None
self.size_variances = None
self.invert = invert
super(SSDBoxCoder, self).__init__(name=name, **kwargs)

def call(self, boxes, anchors):
def corner_to_centroids(box_tensor):
box_tensor = tf.cast(box_tensor, tf.float32)
y_min, x_min, y_max, x_max = tf.split(
box_tensor, num_or_size_splits=4, axis=-1
)
height = y_max - y_min
width = x_max - x_min
cy = y_min + 0.5 * height
cx = x_min + 0.5 * width
return (
cy,
cx,
height + tf.keras.backend.epsilon(),
width + tf.keras.backend.epsilon(),
)

cy_a, cx_a, height_a, width_a = corner_to_centroids(anchors)

if not self.invert:
cy_gt, cx_gt, height_gt, width_gt = corner_to_centroids(boxes)
ty = (cy_gt - cy_a) / height_a
tx = (cx_gt - cx_a) / width_a
th = tf.math.log(height_gt / height_a)
tw = tf.math.log(width_gt / width_a)

if self.center_variances is not None:
ty = ty / tf.cast(self.center_variances[0], dtype=ty.dtype)
tx = tx / tf.cast(self.center_variances[1], dtype=tx.dtype)
th = th / tf.cast(self.size_variances[0], dtype=th.dtype)
tw = tw / tf.cast(self.size_variances[1], dtype=tw.dtype)

return tf.concat([ty, tx, th, tw], axis=-1)

else:
ty, tx, th, tw = tf.split(boxes, num_or_size_splits=4, axis=-1)
if self.center_variances is not None:
ty = ty * tf.cast(self.center_variances[0], dtype=ty.dtype)
tx = tx * tf.cast(self.center_variances[1], dtype=tx.dtype)
th = th * tf.cast(self.size_variances[0], dtype=th.dtype)
tw = tw * tf.cast(self.size_variances[1], dtype=tw.dtype)

height_gt = tf.math.exp(th) * height_a
width_gt = tf.math.exp(tw) * width_a
cy_gt = ty * height_a + cy_a
cx_gt = tx * width_a + cx_a
y_min_gt = cy_gt - 0.5 * height_gt
y_max_gt = cy_gt + 0.5 * height_gt
x_min_gt = cx_gt - 0.5 * width_gt
x_max_gt = cx_gt + 0.5 * width_gt

return tf.concat([y_min_gt, x_min_gt, y_max_gt, x_max_gt], axis=-1)

def get_config(self):
config = {
"center_variances": self.center_variances,
"size_variances": self.size_variances,
"invert": self.invert,
}
base_config = super(SSDBoxCoder, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
48 changes: 48 additions & 0 deletions tests/kerascv/layers/ssd_box_coder_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import numpy as np
from kerascv.layers.ssd_box_coder import SSDBoxCoder


def test_encode_decode_variance():
Copy link

@saberkun saberkun Jun 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to test all modes (e.g. graph, eager) of keras?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that testing util exposed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gt_boxes = np.asarray([[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]], np.float32)
anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]], np.float32)
encode_layer = SSDBoxCoder(
center_variances=[0.5, 1 / 3], size_variances=[0.25, 0.2]
)
encoded_gt_boxes = encode_layer(gt_boxes, anchors)
expected_out = np.asarray(
[
[-1.0, -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062],
]
)
np.testing.assert_allclose(expected_out, encoded_gt_boxes, rtol=1e-06, atol=1e-6)

decode_layer = SSDBoxCoder(
center_variances=[0.5, 1 / 3], size_variances=[0.25, 0.2], invert=True
)
decoded_gt_boxes = decode_layer(encoded_gt_boxes, anchors)
np.testing.assert_allclose(gt_boxes, decoded_gt_boxes, rtol=1e-6, atol=1e-6)


def test_encode_decode_no_variance():
gt_boxes = np.asarray([[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]], np.float32)
anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]], np.float32)
encode_layer = SSDBoxCoder()
encoded_gt_boxes = encode_layer(gt_boxes, anchors)
expected_out = np.asarray(
[[-0.5, -0.41666, -0.40546, -0.18232], [-0.08333, -0.22222, -0.69314, -1.0986]]
)
np.testing.assert_allclose(expected_out, encoded_gt_boxes, rtol=1e-05, atol=1e-5)

decode_layer = SSDBoxCoder(invert=True)
decoded_gt_boxes = decode_layer(encoded_gt_boxes, anchors)
np.testing.assert_allclose(gt_boxes, decoded_gt_boxes, rtol=1e-6, atol=1e-6)


def test_config_with_custom_name():
layer = SSDBoxCoder(
center_variances=[0.1, 0.1], size_variances=[0.2, 0.2], name="box_coder"
)
config = layer.get_config()
layer_1 = SSDBoxCoder.from_config(config)
np.testing.assert_equal(layer_1.name, layer.name)