From 0568102788f8bd67a3ea54f450e8589b7bbebf6a Mon Sep 17 00:00:00 2001 From: zakajd Date: Thu, 4 Jun 2020 20:17:57 +0300 Subject: [PATCH] formatting --- .../detection_models/efficientdet.py | 8 +- pytorch_tools/detection_models/retinanet.py | 13 +- pytorch_tools/models/__init__.py | 2 +- pytorch_tools/models/bit_resnet.py | 455 +++++++++--------- pytorch_tools/models/efficientnet.py | 11 +- pytorch_tools/models/hrnet.py | 136 +++--- pytorch_tools/models/resnet.py | 7 +- pytorch_tools/models/tresnet.py | 37 +- pytorch_tools/models/vgg.py | 1 - pytorch_tools/modules/activated_batch_norm.py | 2 +- pytorch_tools/modules/tf_same_ops.py | 12 +- pytorch_tools/utils/box.py | 117 ++--- pytorch_tools/utils/misc.py | 9 +- tests/detection_models/test_det_models.py | 3 +- tests/losses/test_losses.py | 11 +- tests/models/test_models.py | 21 +- tests/models/test_weights.py | 2 + tests/modules/test_modules.py | 2 + tests/segmentation_models/test_segm_models.py | 11 +- tests/utils/test_utils.py | 24 +- 20 files changed, 473 insertions(+), 411 deletions(-) diff --git a/pytorch_tools/detection_models/efficientdet.py b/pytorch_tools/detection_models/efficientdet.py index e765f97..0446b56 100644 --- a/pytorch_tools/detection_models/efficientdet.py +++ b/pytorch_tools/detection_models/efficientdet.py @@ -161,16 +161,14 @@ def predict(self, x): """ class_outputs, box_outputs = self.forward(x) anchors = box_utils.generate_anchors_boxes(x.shape[-2:])[0] - return box_utils.decode( - class_outputs, box_outputs, anchors, #img_shape=x.shape[-2:] - ) + return box_utils.decode(class_outputs, box_outputs, anchors) def _initialize_weights(self): # init everything except encoder no_encoder_m = [m for n, m in self.named_modules() if not "encoder" in n] initialize_iterator(no_encoder_m) - # need to init last bias so that after sigmoid it's 0.01 - cls_bias_init = -torch.log(torch.tensor((1 - 0.01) / 0.01)) # -4.59 + # need to init last bias so that after sigmoid it's 0.01 + cls_bias_init = -torch.log(torch.tensor((1 - 0.01) / 0.01)) # -4.59 nn.init.constant_(self.cls_head_convs[-1][1].bias, cls_bias_init) diff --git a/pytorch_tools/detection_models/retinanet.py b/pytorch_tools/detection_models/retinanet.py index 0ddf559..2fbdf74 100644 --- a/pytorch_tools/detection_models/retinanet.py +++ b/pytorch_tools/detection_models/retinanet.py @@ -44,7 +44,7 @@ class RetinaNet(nn.Module): def __init__( self, - pretrained="coco", # not used here for proper signature + pretrained="coco", # not used here for proper signature encoder_name="resnet50", encoder_weights="imagenet", pyramid_channels=256, @@ -90,7 +90,7 @@ def make_final_convs(): self.box_convs = make_final_convs() self.box_head_conv = conv3x3(pyramid_channels, 4 * anchors_per_location, bias=True) self.num_classes = num_classes - self. _initialize_weights() + self._initialize_weights() # Name from mmdetectin for convenience def extract_features(self, x): @@ -126,18 +126,17 @@ def predict(self, x): """Run forward on given images and decode raw prediction into bboxes""" class_outputs, box_outputs = self.forward(x) anchors = box_utils.generate_anchors_boxes(x.shape[-2:])[0] - return box_utils.decode( - class_outputs, box_outputs, anchors, img_shape=x.shape[-2:] - ) + return box_utils.decode(class_outputs, box_outputs, anchors) def _initialize_weights(self): # init everything except encoder no_encoder_m = [m for n, m in self.named_modules() if not "encoder" in n] initialize_iterator(no_encoder_m) - # need to init last bias so that after sigmoid it's 0.01 - cls_bias_init = -torch.log(torch.tensor((1 - 0.01) / 0.01)) # -4.59 + # need to init last bias so that after sigmoid it's 0.01 + cls_bias_init = -torch.log(torch.tensor((1 - 0.01) / 0.01)) # -4.59 nn.init.constant_(self.cls_head_conv.bias, cls_bias_init) + # Don't really know input size for the models. 512 is just a guess PRETRAIN_SETTINGS = {**DEFAULT_IMAGENET_SETTINGS, "input_size": (512, 512), "crop_pct": 1, "num_classes": 80} diff --git a/pytorch_tools/models/__init__.py b/pytorch_tools/models/__init__.py index ef9c0d4..4fb855e 100644 --- a/pytorch_tools/models/__init__.py +++ b/pytorch_tools/models/__init__.py @@ -51,4 +51,4 @@ from .bit_resnet import bit_m_101x1 from .bit_resnet import bit_m_101x3 from .bit_resnet import bit_m_152x2 -from .bit_resnet import bit_m_152x4 \ No newline at end of file +from .bit_resnet import bit_m_152x4 diff --git a/pytorch_tools/models/bit_resnet.py b/pytorch_tools/models/bit_resnet.py index 3108e97..c5acbd5 100644 --- a/pytorch_tools/models/bit_resnet.py +++ b/pytorch_tools/models/bit_resnet.py @@ -4,7 +4,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -29,179 +29,181 @@ def conv3x3(cin, cout, stride=1, groups=1, bias=False): - return StdConv2d(cin, cout, kernel_size=3, stride=stride, - padding=1, bias=bias, groups=groups) + return StdConv2d(cin, cout, kernel_size=3, stride=stride, padding=1, bias=bias, groups=groups) def conv1x1(cin, cout, stride=1, bias=False): - return StdConv2d(cin, cout, kernel_size=1, stride=stride, - padding=0, bias=bias) + return StdConv2d(cin, cout, kernel_size=1, stride=stride, padding=0, bias=bias) def tf2th(conv_weights): - """Possibly convert HWIO to OIHW.""" - if conv_weights.ndim == 4: - conv_weights = conv_weights.transpose([3, 2, 0, 1]) - return torch.from_numpy(conv_weights) + """Possibly convert HWIO to OIHW.""" + if conv_weights.ndim == 4: + conv_weights = conv_weights.transpose([3, 2, 0, 1]) + return torch.from_numpy(conv_weights) class PreActBottleneck(nn.Module): - """Pre-activation (v2) bottleneck block. - - Follows the implementation of "Identity Mappings in Deep Residual Networks": - https://github.com/KaimingHe/resnet-1k-layers/blob/master/resnet-pre-act.lua - - Except it puts the stride on 3x3 conv when available. - """ - - def __init__(self, cin, cout=None, cmid=None, stride=1): - super().__init__() - cout = cout or cin - cmid = cmid or cout//4 - - self.gn1 = nn.GroupNorm(32, cin) - self.conv1 = conv1x1(cin, cmid) - self.gn2 = nn.GroupNorm(32, cmid) - self.conv2 = conv3x3(cmid, cmid, stride) # Original code has it on conv1!! - self.gn3 = nn.GroupNorm(32, cmid) - self.conv3 = conv1x1(cmid, cout) - self.relu = nn.ReLU(inplace=True) - - if (stride != 1 or cin != cout): - # Projection also with pre-activation according to paper. - self.downsample = conv1x1(cin, cout, stride) - - def forward(self, x): - out = self.relu(self.gn1(x)) - - # Residual branch - residual = x - if hasattr(self, 'downsample'): - residual = self.downsample(out) - - # Unit's branch - out = self.conv1(out) - out = self.conv2(self.relu(self.gn2(out))) - out = self.conv3(self.relu(self.gn3(out))) - - return out + residual - - def load_from(self, weights, prefix=''): - convname = 'standardized_conv2d' - with torch.no_grad(): - self.conv1.weight.copy_(tf2th(weights[f'{prefix}a/{convname}/kernel'])) - self.conv2.weight.copy_(tf2th(weights[f'{prefix}b/{convname}/kernel'])) - self.conv3.weight.copy_(tf2th(weights[f'{prefix}c/{convname}/kernel'])) - self.gn1.weight.copy_(tf2th(weights[f'{prefix}a/group_norm/gamma'])) - self.gn2.weight.copy_(tf2th(weights[f'{prefix}b/group_norm/gamma'])) - self.gn3.weight.copy_(tf2th(weights[f'{prefix}c/group_norm/gamma'])) - self.gn1.bias.copy_(tf2th(weights[f'{prefix}a/group_norm/beta'])) - self.gn2.bias.copy_(tf2th(weights[f'{prefix}b/group_norm/beta'])) - self.gn3.bias.copy_(tf2th(weights[f'{prefix}c/group_norm/beta'])) - if hasattr(self, 'downsample'): - w = weights[f'{prefix}a/proj/{convname}/kernel'] - self.downsample.weight.copy_(tf2th(w)) + """Pre-activation (v2) bottleneck block. + + Follows the implementation of "Identity Mappings in Deep Residual Networks": + https://github.com/KaimingHe/resnet-1k-layers/blob/master/resnet-pre-act.lua + + Except it puts the stride on 3x3 conv when available. + """ + + def __init__(self, cin, cout=None, cmid=None, stride=1): + super().__init__() + cout = cout or cin + cmid = cmid or cout // 4 + + self.gn1 = nn.GroupNorm(32, cin) + self.conv1 = conv1x1(cin, cmid) + self.gn2 = nn.GroupNorm(32, cmid) + self.conv2 = conv3x3(cmid, cmid, stride) # Original code has it on conv1!! + self.gn3 = nn.GroupNorm(32, cmid) + self.conv3 = conv1x1(cmid, cout) + self.relu = nn.ReLU(inplace=True) + + if stride != 1 or cin != cout: + # Projection also with pre-activation according to paper. + self.downsample = conv1x1(cin, cout, stride) + + def forward(self, x): + out = self.relu(self.gn1(x)) + + # Residual branch + residual = x + if hasattr(self, "downsample"): + residual = self.downsample(out) + + # Unit's branch + out = self.conv1(out) + out = self.conv2(self.relu(self.gn2(out))) + out = self.conv3(self.relu(self.gn3(out))) + + return out + residual + + def load_from(self, weights, prefix=""): + convname = "standardized_conv2d" + with torch.no_grad(): + self.conv1.weight.copy_(tf2th(weights[f"{prefix}a/{convname}/kernel"])) + self.conv2.weight.copy_(tf2th(weights[f"{prefix}b/{convname}/kernel"])) + self.conv3.weight.copy_(tf2th(weights[f"{prefix}c/{convname}/kernel"])) + self.gn1.weight.copy_(tf2th(weights[f"{prefix}a/group_norm/gamma"])) + self.gn2.weight.copy_(tf2th(weights[f"{prefix}b/group_norm/gamma"])) + self.gn3.weight.copy_(tf2th(weights[f"{prefix}c/group_norm/gamma"])) + self.gn1.bias.copy_(tf2th(weights[f"{prefix}a/group_norm/beta"])) + self.gn2.bias.copy_(tf2th(weights[f"{prefix}b/group_norm/beta"])) + self.gn3.bias.copy_(tf2th(weights[f"{prefix}c/group_norm/beta"])) + if hasattr(self, "downsample"): + w = weights[f"{prefix}a/proj/{convname}/kernel"] + self.downsample.weight.copy_(tf2th(w)) + # this models are designed for trasfer learning only! not for training from scratch class ResNetV2(nn.Module): - """ - Implementation of Pre-activation (v2) ResNet mode. - Used to create Bit-M-50/101/152x1/2/3/4 models - - Args: - num_classes (int): Number of classification classes. Defaults to 5 - """ - - def __init__( - self, - block_units, - width_factor, - # in_channels=3, # TODO: add later - num_classes=5, # just a random number - # encoder=False, # TODO: add later + """ + Implementation of Pre-activation (v2) ResNet mode. + Used to create Bit-M-50/101/152x1/2/3/4 models + + Args: + num_classes (int): Number of classification classes. Defaults to 5 + """ + + def __init__( + self, + block_units, + width_factor, + # in_channels=3, # TODO: add later + num_classes=5, # just a random number + # encoder=False, # TODO: add later ): - super().__init__() - wf = width_factor # shortcut 'cause we'll use it a lot. - - # The following will be unreadable if we split lines. - # pylint: disable=line-too-long - self.root = nn.Sequential(OrderedDict([ - ('conv', StdConv2d(3, 64*wf, kernel_size=7, stride=2, padding=3, bias=False)), - ('pad', nn.ConstantPad2d(1, 0)), - ('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=0)), - # The following is subtly not the same! - # ('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), - ])) - - self.body = nn.Sequential(OrderedDict([ - ('block1', nn.Sequential(OrderedDict( - [('unit01', PreActBottleneck(cin=64*wf, cout=256*wf, cmid=64*wf))] + - [(f'unit{i:02d}', PreActBottleneck(cin=256*wf, cout=256*wf, cmid=64*wf)) for i in range(2, block_units[0] + 1)], - ))), - ('block2', nn.Sequential(OrderedDict( - [('unit01', PreActBottleneck(cin=256*wf, cout=512*wf, cmid=128*wf, stride=2))] + - [(f'unit{i:02d}', PreActBottleneck(cin=512*wf, cout=512*wf, cmid=128*wf)) for i in range(2, block_units[1] + 1)], - ))), - ('block3', nn.Sequential(OrderedDict( - [('unit01', PreActBottleneck(cin=512*wf, cout=1024*wf, cmid=256*wf, stride=2))] + - [(f'unit{i:02d}', PreActBottleneck(cin=1024*wf, cout=1024*wf, cmid=256*wf)) for i in range(2, block_units[2] + 1)], - ))), - ('block4', nn.Sequential(OrderedDict( - [('unit01', PreActBottleneck(cin=1024*wf, cout=2048*wf, cmid=512*wf, stride=2))] + - [(f'unit{i:02d}', PreActBottleneck(cin=2048*wf, cout=2048*wf, cmid=512*wf)) for i in range(2, block_units[3] + 1)], - ))), - ])) - # pylint: enable=line-too-long - - self.head = nn.Sequential(OrderedDict([ - ('gn', nn.GroupNorm(32, 2048*wf)), - ('relu', nn.ReLU(inplace=True)), - ('avg', nn.AdaptiveAvgPool2d(output_size=1)), - ('conv', nn.Conv2d(2048*wf, num_classes, kernel_size=1, bias=True)), - ])) - - def features(self, x): - return self.body(self.root(x)) - - def logits(self, x): - return self.head(x) - - def forward(self, x): - x = self.logits(self.features(x)) - assert x.shape[-2:] == (1, 1) # We should have no spatial shape left. - return x[...,0,0] - - def load_from(self, weights, prefix='resnet/'): - with torch.no_grad(): - self.root.conv.weight.copy_(tf2th(weights[f'{prefix}root_block/standardized_conv2d/kernel'])) # pylint: disable=line-too-long - self.head.gn.weight.copy_(tf2th(weights[f'{prefix}group_norm/gamma'])) - self.head.gn.bias.copy_(tf2th(weights[f'{prefix}group_norm/beta'])) - # always zero_head - nn.init.zeros_(self.head.conv.weight) - nn.init.zeros_(self.head.conv.bias) - - for bname, block in self.body.named_children(): - for uname, unit in block.named_children(): - unit.load_from(weights, prefix=f'{prefix}{bname}/{uname}/') - - - - -KNOWN_MODELS = OrderedDict([ - ('BiT-M-R50x1', lambda *a, **kw: ResNetV2([3, 4, 6, 3], 1, *a, **kw)), - ('BiT-M-R50x3', lambda *a, **kw: ResNetV2([3, 4, 6, 3], 3, *a, **kw)), - ('BiT-M-R101x1', lambda *a, **kw: ResNetV2([3, 4, 23, 3], 1, *a, **kw)), - ('BiT-M-R101x3', lambda *a, **kw: ResNetV2([3, 4, 23, 3], 3, *a, **kw)), - ('BiT-M-R152x2', lambda *a, **kw: ResNetV2([3, 8, 36, 3], 2, *a, **kw)), - ('BiT-M-R152x4', lambda *a, **kw: ResNetV2([3, 8, 36, 3], 4, *a, **kw)), - - ('BiT-S-R50x1', lambda *a, **kw: ResNetV2([3, 4, 6, 3], 1, *a, **kw)), - ('BiT-S-R50x3', lambda *a, **kw: ResNetV2([3, 4, 6, 3], 3, *a, **kw)), - ('BiT-S-R101x1', lambda *a, **kw: ResNetV2([3, 4, 23, 3], 1, *a, **kw)), - ('BiT-S-R101x3', lambda *a, **kw: ResNetV2([3, 4, 23, 3], 3, *a, **kw)), - ('BiT-S-R152x2', lambda *a, **kw: ResNetV2([3, 8, 36, 3], 2, *a, **kw)), - ('BiT-S-R152x4', lambda *a, **kw: ResNetV2([3, 8, 36, 3], 4, *a, **kw)), -]) + super().__init__() + wf = width_factor # shortcut 'cause we'll use it a lot. + + # The following will be unreadable if we split lines. + # pylint: disable=line-too-long + # fmt: off + self.root = nn.Sequential(OrderedDict([ + ('conv', StdConv2d(3, 64*wf, kernel_size=7, stride=2, padding=3, bias=False)), + ('pad', nn.ConstantPad2d(1, 0)), + ('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=0)), + # The following is subtly not the same! + # ('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), + ])) + + self.body = nn.Sequential(OrderedDict([ + ('block1', nn.Sequential(OrderedDict( + [('unit01', PreActBottleneck(cin=64*wf, cout=256*wf, cmid=64*wf))] + + [(f'unit{i:02d}', PreActBottleneck(cin=256*wf, cout=256*wf, cmid=64*wf)) for i in range(2, block_units[0] + 1)], + ))), + ('block2', nn.Sequential(OrderedDict( + [('unit01', PreActBottleneck(cin=256*wf, cout=512*wf, cmid=128*wf, stride=2))] + + [(f'unit{i:02d}', PreActBottleneck(cin=512*wf, cout=512*wf, cmid=128*wf)) for i in range(2, block_units[1] + 1)], + ))), + ('block3', nn.Sequential(OrderedDict( + [('unit01', PreActBottleneck(cin=512*wf, cout=1024*wf, cmid=256*wf, stride=2))] + + [(f'unit{i:02d}', PreActBottleneck(cin=1024*wf, cout=1024*wf, cmid=256*wf)) for i in range(2, block_units[2] + 1)], + ))), + ('block4', nn.Sequential(OrderedDict( + [('unit01', PreActBottleneck(cin=1024*wf, cout=2048*wf, cmid=512*wf, stride=2))] + + [(f'unit{i:02d}', PreActBottleneck(cin=2048*wf, cout=2048*wf, cmid=512*wf)) for i in range(2, block_units[3] + 1)], + ))), + ])) + # pylint: enable=line-too-long + + self.head = nn.Sequential(OrderedDict([ + ('gn', nn.GroupNorm(32, 2048*wf)), + ('relu', nn.ReLU(inplace=True)), + ('avg', nn.AdaptiveAvgPool2d(output_size=1)), + ('conv', nn.Conv2d(2048*wf, num_classes, kernel_size=1, bias=True)), + ])) + # fmt: on + + def features(self, x): + return self.body(self.root(x)) + + def logits(self, x): + return self.head(x) + + def forward(self, x): + x = self.logits(self.features(x)) + assert x.shape[-2:] == (1, 1) # We should have no spatial shape left. + return x[..., 0, 0] + + def load_from(self, weights, prefix="resnet/"): + with torch.no_grad(): + self.root.conv.weight.copy_( + tf2th(weights[f"{prefix}root_block/standardized_conv2d/kernel"]) + ) # pylint: disable=line-too-long + self.head.gn.weight.copy_(tf2th(weights[f"{prefix}group_norm/gamma"])) + self.head.gn.bias.copy_(tf2th(weights[f"{prefix}group_norm/beta"])) + # always zero_head + nn.init.zeros_(self.head.conv.weight) + nn.init.zeros_(self.head.conv.bias) + + for bname, block in self.body.named_children(): + for uname, unit in block.named_children(): + unit.load_from(weights, prefix=f"{prefix}{bname}/{uname}/") + + +KNOWN_MODELS = OrderedDict( + [ + ("BiT-M-R50x1", lambda *a, **kw: ResNetV2([3, 4, 6, 3], 1, *a, **kw)), + ("BiT-M-R50x3", lambda *a, **kw: ResNetV2([3, 4, 6, 3], 3, *a, **kw)), + ("BiT-M-R101x1", lambda *a, **kw: ResNetV2([3, 4, 23, 3], 1, *a, **kw)), + ("BiT-M-R101x3", lambda *a, **kw: ResNetV2([3, 4, 23, 3], 3, *a, **kw)), + ("BiT-M-R152x2", lambda *a, **kw: ResNetV2([3, 8, 36, 3], 2, *a, **kw)), + ("BiT-M-R152x4", lambda *a, **kw: ResNetV2([3, 8, 36, 3], 4, *a, **kw)), + ("BiT-S-R50x1", lambda *a, **kw: ResNetV2([3, 4, 6, 3], 1, *a, **kw)), + ("BiT-S-R50x3", lambda *a, **kw: ResNetV2([3, 4, 6, 3], 3, *a, **kw)), + ("BiT-S-R101x1", lambda *a, **kw: ResNetV2([3, 4, 23, 3], 1, *a, **kw)), + ("BiT-S-R101x3", lambda *a, **kw: ResNetV2([3, 4, 23, 3], 3, *a, **kw)), + ("BiT-S-R152x2", lambda *a, **kw: ResNetV2([3, 8, 36, 3], 2, *a, **kw)), + ("BiT-S-R152x4", lambda *a, **kw: ResNetV2([3, 8, 36, 3], 4, *a, **kw)), + ] +) PRETRAIN_SETTINGS = { @@ -215,96 +217,99 @@ def load_from(self, weights, prefix='resnet/'): # fmt: off CFGS = { - # weights are loaded by default - "bit_m_50x1": { - "default": { - "params": {"block_units": [3, 4, 6, 3], "width_factor": 1}, - "url": "https://storage.googleapis.com/bit_models/BiT-M-R50x1.npz", - **PRETRAIN_SETTINGS + # weights are loaded by default + "bit_m_50x1": { + "default": { + "params": {"block_units": [3, 4, 6, 3], "width_factor": 1}, + "url": "https://storage.googleapis.com/bit_models/BiT-M-R50x1.npz", + **PRETRAIN_SETTINGS + }, }, - }, - "bit_m_50x3": { - "default": { - "params": {"block_units": [3, 4, 6, 3], "width_factor": 3}, - "url": "https://storage.googleapis.com/bit_models/BiT-M-R50x3.npz", - **PRETRAIN_SETTINGS, + "bit_m_50x3": { + "default": { + "params": {"block_units": [3, 4, 6, 3], "width_factor": 3}, + "url": "https://storage.googleapis.com/bit_models/BiT-M-R50x3.npz", + **PRETRAIN_SETTINGS, + }, }, - }, - "bit_m_101x1": { - "default": { - "params": {"block_units": [3, 4, 23, 3], "width_factor": 1}, - "url": "https://storage.googleapis.com/bit_models/BiT-M-R101x1.npz", - **PRETRAIN_SETTINGS, + "bit_m_101x1": { + "default": { + "params": {"block_units": [3, 4, 23, 3], "width_factor": 1}, + "url": "https://storage.googleapis.com/bit_models/BiT-M-R101x1.npz", + **PRETRAIN_SETTINGS, + }, }, - }, - "bit_m_101x3": { - "default": { - "params": {"block_units": [3, 4, 23, 3], "width_factor": 3}, - "url": "https://storage.googleapis.com/bit_models/BiT-M-R101x3.npz", - **PRETRAIN_SETTINGS, + "bit_m_101x3": { + "default": { + "params": {"block_units": [3, 4, 23, 3], "width_factor": 3}, + "url": "https://storage.googleapis.com/bit_models/BiT-M-R101x3.npz", + **PRETRAIN_SETTINGS, + }, }, - }, - "bit_m_152x2": { - "default": { - "params": {"block_units": [3, 8, 36, 3], "width_factor": 2}, - "url": "https://storage.googleapis.com/bit_models/BiT-M-R152x2.npz", - **PRETRAIN_SETTINGS, + "bit_m_152x2": { + "default": { + "params": {"block_units": [3, 8, 36, 3], "width_factor": 2}, + "url": "https://storage.googleapis.com/bit_models/BiT-M-R152x2.npz", + **PRETRAIN_SETTINGS, + }, }, - }, - "bit_m_152x4": { - "default": { - "params": {"block_units": [3, 8, 36, 3], "width_factor": 4}, - "url": "https://storage.googleapis.com/bit_models/BiT-M-R152x4.npz", - **PRETRAIN_SETTINGS + "bit_m_152x4": { + "default": { + "params": {"block_units": [3, 8, 36, 3], "width_factor": 4}, + "url": "https://storage.googleapis.com/bit_models/BiT-M-R152x4.npz", + **PRETRAIN_SETTINGS + }, }, - }, } # fmt: on def _bit_resnet(arch, pretrained=None, **kwargs): - cfgs = deepcopy(CFGS) - cfg_settings = cfgs[arch]["default"] - cfg_params = cfg_settings.pop("params") - cfg_url = cfg_settings.pop("url") - kwargs.pop("pretrained", None) - kwargs.update(cfg_params) - model = ResNetV2(**kwargs) - # load weights to torch checkpoints folder - try: - torch.hub.load_state_dict_from_url(cfg_url) - except RuntimeError: - pass # to avoid RuntimeError: Only one file(not dir) is allowed in the zipfile - filename = os.path.basename(urlparse(cfg_url).path) - torch_home = torch.hub._get_torch_home() - cached_file = os.path.join(torch_home, 'checkpoints', filename) - weights = np.load(cached_file) - model.load_from(weights) - return model + cfgs = deepcopy(CFGS) + cfg_settings = cfgs[arch]["default"] + cfg_params = cfg_settings.pop("params") + cfg_url = cfg_settings.pop("url") + kwargs.pop("pretrained", None) + kwargs.update(cfg_params) + model = ResNetV2(**kwargs) + # load weights to torch checkpoints folder + try: + torch.hub.load_state_dict_from_url(cfg_url) + except RuntimeError: + pass # to avoid RuntimeError: Only one file(not dir) is allowed in the zipfile + filename = os.path.basename(urlparse(cfg_url).path) + torch_home = torch.hub._get_torch_home() + cached_file = os.path.join(torch_home, "checkpoints", filename) + weights = np.load(cached_file) + model.load_from(weights) + return model + # only want M versions of models for fine-tuning @wraps(ResNetV2) def bit_m_50x1(**kwargs): - return _bit_resnet("bit_m_50x1", **kwargs) + return _bit_resnet("bit_m_50x1", **kwargs) + @wraps(ResNetV2) def bit_m_50x3(**kwargs): - return _bit_resnet("bit_m_50x3", **kwargs) + return _bit_resnet("bit_m_50x3", **kwargs) + @wraps(ResNetV2) def bit_m_101x1(**kwargs): - return _bit_resnet("bit_m_101x1", **kwargs) + return _bit_resnet("bit_m_101x1", **kwargs) + @wraps(ResNetV2) def bit_m_101x3(**kwargs): - return _bit_resnet("bit_m_101x3", **kwargs) + return _bit_resnet("bit_m_101x3", **kwargs) + @wraps(ResNetV2) def bit_m_152x2(**kwargs): - return _bit_resnet("bit_m_152x2", **kwargs) + return _bit_resnet("bit_m_152x2", **kwargs) + @wraps(ResNetV2) def bit_m_152x4(**kwargs): - return _bit_resnet("bit_m_152x4", **kwargs) - - - + return _bit_resnet("bit_m_152x4", **kwargs) diff --git a/pytorch_tools/models/efficientnet.py b/pytorch_tools/models/efficientnet.py index ca5e337..17e67c0 100644 --- a/pytorch_tools/models/efficientnet.py +++ b/pytorch_tools/models/efficientnet.py @@ -144,7 +144,7 @@ def __init__( self.dropout = nn.Dropout(drop_rate, inplace=True) self.classifier = nn.Linear(num_features, num_classes) - patch_bn(self) # adjust epsilon + patch_bn(self) # adjust epsilon initialize(self) if match_tf_same_padding: conv_to_same_conv(self) @@ -397,7 +397,8 @@ def patch_bn(module): module.eps = 1e-3 for m in module.children(): patch_bn(m) - + + def _efficientnet(arch, pretrained=None, **kwargs): cfgs = deepcopy(CFGS) cfg_settings = cfgs[arch]["default"] @@ -426,8 +427,10 @@ def _efficientnet(arch, pretrained=None, **kwargs): ) state_dict["classifier.weight"] = model.state_dict()["classifier.weight"] state_dict["classifier.bias"] = model.state_dict()["classifier.bias"] - if kwargs.get("in_channels", 3) != 3: # support pretrained for custom input channels - state_dict["conv_stem.weight"] = repeat_channels(state_dict["conv_stem.weight"], kwargs["in_channels"]) + if kwargs.get("in_channels", 3) != 3: # support pretrained for custom input channels + state_dict["conv_stem.weight"] = repeat_channels( + state_dict["conv_stem.weight"], kwargs["in_channels"] + ) model.load_state_dict(state_dict) setattr(model, "pretrained_settings", cfg_settings) return model diff --git a/pytorch_tools/models/hrnet.py b/pytorch_tools/models/hrnet.py index 0a65182..cbf5817 100644 --- a/pytorch_tools/models/hrnet.py +++ b/pytorch_tools/models/hrnet.py @@ -43,22 +43,23 @@ def make_layer(inplanes, planes, blocks, norm_layer=ABN, norm_act="relu"): layers = [] layers.append(block(inplanes, planes, downsample=downsample, **bn_args)) inplanes = planes * block.expansion - for i in range(1, blocks): + for _ in range(1, blocks): layers.append(block(inplanes, planes, **bn_args)) return nn.Sequential(*layers) + class HighResolutionModule(nn.Module): def __init__( - self, - num_branches, # number of parallel branches - num_blocks, # number of blocks + self, + num_branches, # number of parallel branches + num_blocks, # number of blocks num_channels, norm_layer=ABN, norm_act="relu", ): super(HighResolutionModule, self).__init__() self.block = BasicBlock - self.num_branches = num_branches # used in forward + self.num_branches = num_branches # used in forward self.num_inchannels = num_channels self.bn_args = {"norm_layer": norm_layer, "norm_act": norm_act} branches = [self._make_branch(n_bl, n_ch) for n_bl, n_ch in zip(num_blocks, num_channels)] @@ -69,6 +70,7 @@ def __init__( def _make_branch(self, b_blocks, b_channels): return nn.Sequential(*[self.block(b_channels, b_channels, **self.bn_args) for _ in range(b_blocks)]) + # fmt: off # don't want to rewrite this piece it's too fragile def _make_fuse_layers(self, norm_layer, norm_act): if self.num_branches == 1: @@ -104,23 +106,24 @@ def _make_fuse_layers(self, norm_layer, norm_act): fuse_layers.append(nn.ModuleList(fuse_layer)) return nn.ModuleList(fuse_layers) - + # fmt: on def forward(self, x): if self.num_branches == 1: return [self.branches[0](x[0])] - + x = [branch(x_i) for branch, x_i in zip(self.branches, x)] x_fuse = [] for i in range(len(self.fuse_layers)): y = x[0] if i == 0 else self.fuse_layers[i][0](x[0]) for j in range(1, self.num_branches): - y = y + self.fuse_layers[i][j](x[j]) + y = y + self.fuse_layers[i][j](x[j]) x_fuse.append(self.relu(y)) return x_fuse + class TransitionBlock(nn.Module): """Transition is where new branches for smaller resolution are born -- ==> -- @@ -129,7 +132,7 @@ class TransitionBlock(nn.Module): \ \=> -- """ - + def __init__(self, prev_channels, current_channels, norm_layer=ABN, norm_act="relu"): super().__init__() transition_layers = [] @@ -140,40 +143,40 @@ def __init__(self, prev_channels, current_channels, norm_layer=ABN, norm_act="re transition_layers.append(nn.Sequential(*layers)) else: transition_layers.append(nn.Identity()) - - if len(current_channels) > len(prev_channels): # only works for ONE extra branch + + if len(current_channels) > len(prev_channels): # only works for ONE extra branch layers = [ - conv3x3(prev_channels[-1], current_channels[-1], 2), - norm_layer(current_channels[-1], activation=norm_act) + conv3x3(prev_channels[-1], current_channels[-1], 2), + norm_layer(current_channels[-1], activation=norm_act), ] transition_layers.append(nn.Sequential(*layers)) self.trans_layers = nn.ModuleList(transition_layers) - - def forward(self, x): # x is actually an array + + def forward(self, x): # x is actually an array out_x = [trans_l(x_i) for x_i, trans_l in zip(x, self.trans_layers)] out_x.append(self.trans_layers[-1](x[-1])) return out_x + class HRClassificationHead(nn.Module): def __init__(self, pre_channels, norm_layer=ABN, norm_act="relu"): super().__init__() head_block = Bottleneck head_channels = [32, 64, 128, 256] - # Increasing the #channels on each resolution + # Increasing the #channels on each resolution # from C, 2C, 4C, 8C to 128, 256, 512, 1024 incre_modules = [] for (pre_c, head_c) in zip(pre_channels, head_channels): incre_modules.append(make_layer(pre_c, head_c, 1, norm_layer, norm_act)) self.incre_modules = nn.ModuleList(incre_modules) - + # downsampling modules downsamp_modules = [] - for i in range(len(pre_channels)-1): + for i in range(len(pre_channels) - 1): in_ch = head_channels[i] * head_block.expansion - out_ch = head_channels[i+1] * head_block.expansion + out_ch = head_channels[i + 1] * head_block.expansion downsamp_module = nn.Sequential( - conv3x3(in_ch, out_ch, 2, bias=True), - norm_layer(out_ch, activation=norm_act) + conv3x3(in_ch, out_ch, 2, bias=True), norm_layer(out_ch, activation=norm_act) ) downsamp_modules.append(downsamp_module) self.downsamp_modules = nn.ModuleList(downsamp_modules) @@ -182,13 +185,13 @@ def __init__(self, pre_channels, norm_layer=ABN, norm_act="relu"): conv1x1(head_channels[3] * head_block.expansion, 2048, bias=True), norm_layer(2048, activation=norm_act), ) - + def forward(self, x): - x = [self.incre_modules[i](x[i]) for i in range(4)] + x = [self.incre_modules[i](x[i]) for i in range(4)] for i in range(1, 4): - x[i] = x[i] + self.downsamp_modules[i-1](x[i-1]) + x[i] = x[i] + self.downsamp_modules[i - 1](x[i - 1]) return self.final_layer(x[3]) - + class HighResolutionNet(nn.Module): """HighResolution Nets constructor @@ -219,13 +222,14 @@ class HighResolutionNet(nn.Module): NOTE: HRNet first features have resolution 4x times smaller than input, not 2x as all other models. So it CAN'T be used as encoder in Unet and Linknet models """ - # drop_rate (float): - # Dropout probability before classifier, for training. Defaults to 0. + + # drop_rate (float): + # Dropout probability before classifier, for training. Defaults to 0. def __init__( - self, + self, width=18, small=False, - pretrained=None, # not used. here for proper signature + pretrained=None, # not used. here for proper signature num_classes=1000, in_channels=3, norm_layer="abn", @@ -241,27 +245,25 @@ def __init__( self.conv2 = conv3x3(stem_width, stem_width, stride=2) self.bn2 = norm_layer(stem_width, activation=norm_act) - + channels = [width, width * 2, width * 4, width * 8] n_blocks = [2 if small else 4] * 4 - + self.layer1 = make_layer(stem_width, stem_width, n_blocks[0], **bn_args) - + self.transition1 = TransitionBlock([stem_width * Bottleneck.expansion], channels[:2], **bn_args) - self.stage2 = self._make_stage( - n_modules=1, n_branches=2, n_blocks=n_blocks[:2], n_chnls=channels[:2] - ) - + self.stage2 = self._make_stage(n_modules=1, n_branches=2, n_blocks=n_blocks[:2], n_chnls=channels[:2]) + self.transition2 = TransitionBlock(channels[:2], channels[:3], **bn_args) - self.stage3 = self._make_stage( # 3 if small else 4 - n_modules=(4,3)[small], n_branches=3, n_blocks=n_blocks[:3], n_chnls=channels[:3] + self.stage3 = self._make_stage( # 3 if small else 4 + n_modules=(4, 3)[small], n_branches=3, n_blocks=n_blocks[:3], n_chnls=channels[:3] ) - + self.transition3 = TransitionBlock(channels[:3], channels, **bn_args) - self.stage4 = self._make_stage( # 2 if small else 3 - n_modules=(3,2)[small], n_branches=4, n_blocks=n_blocks, n_chnls=channels, + self.stage4 = self._make_stage( # 2 if small else 3 + n_modules=(3, 2)[small], n_branches=4, n_blocks=n_blocks, n_chnls=channels, ) - + self.encoder = encoder if encoder: self.forward = self.encoder_features @@ -276,16 +278,9 @@ def __init__( def _make_stage(self, n_modules, n_branches, n_blocks, n_chnls): modules = [] for i in range(n_modules): - modules.append( - HighResolutionModule( - n_branches, - n_blocks, - n_chnls, - **self.bn_args, - ) - ) + modules.append(HighResolutionModule(n_branches, n_blocks, n_chnls, **self.bn_args,)) return nn.Sequential(*modules) - + def encoder_features(self, x): # stem x = self.conv1(x) @@ -293,46 +288,46 @@ def encoder_features(self, x): x = self.conv2(x) x = self.bn2(x) x = self.layer1(x) - - x = self.transition1([x]) # x is actually a list now + + x = self.transition1([x]) # x is actually a list now x = self.stage2(x) - + x = self.transition2(x) x = self.stage3(x) - + x = self.transition3(x) x = self.stage4(x) - if self.encoder: # want to return from lowest resolution to highest + if self.encoder: # want to return from lowest resolution to highest x = [x[3], x[2], x[1], x[0], x[0]] return x - + def features(self, x): x = self.encoder_features(x) x = self.cls_head(x) return x - + def logits(self, x): x = self.global_pool(x) x = torch.flatten(x, 1) -# x = self.dropout(x) + # x = self.dropout(x) x = self.last_linear(x) return x - + def forward(self, x): x = self.features(x) x = self.logits(x) return x - + def load_state_dict(self, state_dict, **kwargs): self_keys = list(self.state_dict().keys()) sd_keys = list(state_dict.keys()) - sd_keys = [k for k in sd_keys if "num_batches_tracked" not in k] # filter + sd_keys = [k for k in sd_keys if "num_batches_tracked" not in k] # filter new_state_dict = {} for new_key, old_key in zip(self_keys, sd_keys): new_state_dict[new_key] = state_dict[old_key] super().load_state_dict(new_state_dict, **kwargs) - - + + # fmt: off CFGS = { "hrnet_w18_small": { @@ -368,9 +363,10 @@ def load_state_dict(self, state_dict, **kwargs): "imagenet": {"url": None}, }, } - + # fmt:on - + + def _hrnet(arch, pretrained=None, **kwargs): cfgs = deepcopy(CFGS) cfg_settings = cfgs[arch]["default"] @@ -420,7 +416,7 @@ def hrnet_w18_small(**kwargs): def hrnet_w18(**kwargs): r"""Constructs a HRNetv2-18 model.""" return _hrnet("hrnet_w18", **kwargs) - + @wraps(HighResolutionNet) @add_docs_for(HighResolutionNet) @@ -428,33 +424,37 @@ def hrnet_w30(**kwargs): r"""Constructs a HRNetv2-30 model.""" return _hrnet("hrnet_w30", **kwargs) + @wraps(HighResolutionNet) @add_docs_for(HighResolutionNet) def hrnet_w32(**kwargs): r"""Constructs a HRNetv2-32 model.""" return _hrnet("hrnet_w32", **kwargs) + @wraps(HighResolutionNet) @add_docs_for(HighResolutionNet) def hrnet_w40(**kwargs): r"""Constructs a HRNetv2-40 model.""" return _hrnet("hrnet_w40", **kwargs) + @wraps(HighResolutionNet) @add_docs_for(HighResolutionNet) def hrnet_w44(**kwargs): r"""Constructs a HRNetv2-44 model.""" return _hrnet("hrnet_w44", **kwargs) + @wraps(HighResolutionNet) @add_docs_for(HighResolutionNet) def hrnet_w48(**kwargs): r"""Constructs a HRNetv2-48 model.""" return _hrnet("hrnet_w48", **kwargs) + @wraps(HighResolutionNet) @add_docs_for(HighResolutionNet) def hrnet_w64(**kwargs): r"""Constructs a HRNetv2-64 model.""" return _hrnet("hrnet_w64", **kwargs) - diff --git a/pytorch_tools/models/resnet.py b/pytorch_tools/models/resnet.py index 0490cb6..bdc9e57 100644 --- a/pytorch_tools/models/resnet.py +++ b/pytorch_tools/models/resnet.py @@ -214,7 +214,7 @@ def _make_stem(self, stem_type, stem_width, in_channels, norm_layer, norm_act): # in the paper they use conv1x1 but in code conv3x3 (which seems better) self.conv1 = nn.Sequential(SpaceToDepth(), conv3x3(in_channels * 16, stem_width)) self.bn1 = norm_layer(stem_width, activation=norm_act) - self.maxpool = nn.Identity() # not used but needed for code compatability + self.maxpool = nn.Identity() # not used but needed for code compatability else: if stem_type == "deep": self.conv1 = nn.Sequential( @@ -225,7 +225,9 @@ def _make_stem(self, stem_type, stem_width, in_channels, norm_layer, norm_act): conv3x3(stem_width // 2, stem_width), ) else: - self.conv1 = nn.Conv2d(in_channels, stem_width, kernel_size=7, stride=2, padding=3, bias=False) + self.conv1 = nn.Conv2d( + in_channels, stem_width, kernel_size=7, stride=2, padding=3, bias=False + ) self.bn1 = norm_layer(stem_width, activation=norm_act) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) @@ -296,6 +298,7 @@ def keep_prob(self): self.block_idx += 1 return keep_prob + # fmt: off CFGS = { # RESNET MODELS diff --git a/pytorch_tools/models/tresnet.py b/pytorch_tools/models/tresnet.py index 022c9b0..9be0c36 100644 --- a/pytorch_tools/models/tresnet.py +++ b/pytorch_tools/models/tresnet.py @@ -19,6 +19,7 @@ # avoid overwriting doc string wraps = partial(wraps, assigned=("__module__", "__name__", "__qualname__", "__annotations__")) + class TResNet(ResNet): """TResNet M / TResNet L / XL @@ -71,13 +72,13 @@ def __init__( drop_rate=0.0, drop_connect_rate=0.0, ): - nn.Module.__init__(self) + nn.Module.__init__(self) stem_width = int(64 * width_factor) norm_layer = bn_from_name(norm_layer) self.inplanes = stem_width self.num_classes = num_classes - self.groups = 1 # not really used but needed inside _make_layer - self.base_width = 64 # used inside _make_layer + self.groups = 1 # not really used but needed inside _make_layer + self.base_width = 64 # used inside _make_layer self.norm_act = norm_act self.block_idx = 0 self.num_blocks = sum(layers) @@ -89,9 +90,9 @@ def __init__( raise ValueError("Output stride should be in [8, 16, 32]") # TODO add OS later # if output_stride == 8: - # stride_3, stride_4, dilation_3, dilation_4 = 1, 1, 2, 4 + # stride_3, stride_4, dilation_3, dilation_4 = 1, 1, 2, 4 # elif output_stride == 16: - # stride_3, stride_4, dilation_3, dilation_4 = 2, 1, 1, 2 + # stride_3, stride_4, dilation_3, dilation_4 = 2, 1, 1, 2 # elif output_stride == 32: stride_3, stride_4, dilation_3, dilation_4 = 2, 2, 1, 1 @@ -101,11 +102,15 @@ def __init__( self.layer1 = self._make_layer(stem_width, layers[0], stride=1, **largs) self.layer2 = self._make_layer(stem_width * 2, layers[1], stride=2, **largs) - self.block = TBottleneck # first 2 - Basic, last 2 - Bottleneck + self.block = TBottleneck # first 2 - Basic, last 2 - Bottleneck self.expansion = TBottleneck.expansion - self.layer3 = self._make_layer(stem_width * 4, layers[2], stride=stride_3, dilation=dilation_3, **largs) - largs.update(attn_type=None) # no se in last layer - self.layer4 = self._make_layer(stem_width * 8, layers[3], stride=stride_4, dilation=dilation_4, **largs) + self.layer3 = self._make_layer( + stem_width * 4, layers[2], stride=stride_3, dilation=dilation_3, **largs + ) + largs.update(attn_type=None) # no se in last layer + self.layer4 = self._make_layer( + stem_width * 8, layers[3], stride=stride_4, dilation=dilation_4, **largs + ) self.global_pool = FastGlobalAvgPool2d(flatten=True) self.num_features = stem_width * 8 * self.expansion self.encoder = encoder @@ -123,6 +128,7 @@ def load_state_dict(self, state_dict, **kwargs): state_dict.pop("last_linear.bias") nn.Module.load_state_dict(self, state_dict, **kwargs) + # fmt: off # images should be normalized to [0, 1] PRETRAIN_SETTINGS = { @@ -169,6 +175,7 @@ def load_state_dict(self, state_dict, **kwargs): } # fmt: on + def patch_bn(module): """changes weight from InplaceABN to be compatible with usual ABN""" if isinstance(module, ABN): @@ -176,6 +183,7 @@ def patch_bn(module): for m in module.children(): patch_bn(m) + def _resnet(arch, pretrained=None, **kwargs): cfgs = deepcopy(CFGS) cfg_settings = cfgs[arch]["default"] @@ -204,27 +212,32 @@ def _resnet(arch, pretrained=None, **kwargs): # if there is last_linear in state_dict, it's going to be overwritten state_dict["last_linear.weight"] = model.state_dict()["last_linear.weight"] state_dict["last_linear.bias"] = model.state_dict()["last_linear.bias"] - if kwargs.get("in_channels", 3) != 3: # support pretrained for custom input channels - state_dict["conv1.1.weight"] = repeat_channels(state_dict["conv1.1.weight"], kwargs["in_channels"] * 16, 3 * 16) + if kwargs.get("in_channels", 3) != 3: # support pretrained for custom input channels + state_dict["conv1.1.weight"] = repeat_channels( + state_dict["conv1.1.weight"], kwargs["in_channels"] * 16, 3 * 16 + ) model.load_state_dict(state_dict) patch_bn(model) setattr(model, "pretrained_settings", cfg_settings) return model + @wraps(TResNet) @add_docs_for(TResNet) def tresnetm(**kwargs): r"""Constructs a TResnetM model.""" return _resnet("tresnetm", **kwargs) + @wraps(TResNet) @add_docs_for(TResNet) def tresnetl(**kwargs): r"""Constructs a TResnetL model.""" return _resnet("tresnetl", **kwargs) + @wraps(TResNet) @add_docs_for(TResNet) def tresnetxl(**kwargs): r"""Constructs a TResnetXL model.""" - return _resnet("tresnetxl", **kwargs) \ No newline at end of file + return _resnet("tresnetxl", **kwargs) diff --git a/pytorch_tools/models/vgg.py b/pytorch_tools/models/vgg.py index b2037ce..0f17b85 100644 --- a/pytorch_tools/models/vgg.py +++ b/pytorch_tools/models/vgg.py @@ -90,7 +90,6 @@ def forward(self, x): x = self.logits(x) return x - def _make_layers(self, cfg): layers = [] in_channels = self.in_channels diff --git a/pytorch_tools/modules/activated_batch_norm.py b/pytorch_tools/modules/activated_batch_norm.py index f4e0786..586ae0d 100644 --- a/pytorch_tools/modules/activated_batch_norm.py +++ b/pytorch_tools/modules/activated_batch_norm.py @@ -46,7 +46,7 @@ def __init__( self.activation = ACT(activation) self.activation_param = activation_param self.frozen = frozen - + if frozen: self.register_buffer("weight", torch.ones(num_features)) self.register_buffer("bias", torch.zeros(num_features)) diff --git a/pytorch_tools/modules/tf_same_ops.py b/pytorch_tools/modules/tf_same_ops.py index 1c5adb6..cfb8026 100644 --- a/pytorch_tools/modules/tf_same_ops.py +++ b/pytorch_tools/modules/tf_same_ops.py @@ -5,6 +5,7 @@ import torch.nn.functional as F from torch.nn.modules.utils import _pair + def pad_same(x, k, s, d, value=0): # type: (Tensor, int, int, int, float)->Tensor # x - input tensor, s - stride, k - kernel_size, d - dilation @@ -15,26 +16,31 @@ def pad_same(x, k, s, d, value=0): x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2], value=value) return x -# current implementation is only for symmetric case. But there are no non symmetric cases + +# current implementation is only for symmetric case. But there are no non symmetric cases def conv2d_same(x, weight, bias=None, stride=(1, 1), dilation=(1, 1), groups=1): # type: (Tensor, Tensor, Optional[torch.Tensor], Tuple[int, int], Tuple[int, int], int)->Tensor x = pad_same(x, weight.shape[-1], stride[0], dilation[0]) return F.conv2d(x, weight, bias, stride, (0, 0), dilation, groups) + def maxpool2d_same(x, kernel_size, stride): # type: (Tensor, Tuple[int, int], Tuple[int, int])->Tensor - x = pad_same(x, kernel_size[0], stride[0], 1, value=-float('inf')) + x = pad_same(x, kernel_size[0], stride[0], 1, value=-float("inf")) return F.max_pool2d(x, kernel_size, stride, (0, 0)) + class Conv2dSamePadding(nn.Conv2d): """Assymetric padding matching TensorFlow `same`""" def forward(self, x): return conv2d_same(x, self.weight, self.bias, self.stride, self.dilation, self.groups) -# as of 1.5 there is no _pair in MaxPool. Remove when this is fixed + +# as of 1.5 there is no _pair in MaxPool. Remove when this is fixed class MaxPool2dSamePadding(nn.MaxPool2d): """Assymetric padding matching TensorFlow `same`""" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.kernel_size = _pair(self.kernel_size) diff --git a/pytorch_tools/utils/box.py b/pytorch_tools/utils/box.py index 73bb377..16b349e 100644 --- a/pytorch_tools/utils/box.py +++ b/pytorch_tools/utils/box.py @@ -3,6 +3,7 @@ import numpy as np from functools import wraps + def box2delta(boxes, anchors): # type: (Tensor, Tensor)->Tensor """Convert boxes to deltas from anchors. Boxes are expected in 'ltrb' format @@ -14,9 +15,9 @@ def box2delta(boxes, anchors): offset_x, offset_y, scale_x, scale_y """ - anchors_wh = anchors[..., 2:] - anchors[..., :2] # + 1 + anchors_wh = anchors[..., 2:] - anchors[..., :2] anchors_ctr = anchors[..., :2] + 0.5 * anchors_wh - boxes_wh = boxes[..., 2:] - boxes[..., :2] # + 1 + boxes_wh = boxes[..., 2:] - boxes[..., :2] boxes_ctr = boxes[..., :2] + 0.5 * boxes_wh offset_delta = (boxes_ctr - anchors_ctr) / anchors_wh scale_delta = torch.log(boxes_wh / anchors_wh) @@ -40,7 +41,7 @@ def delta2box(deltas, anchors): # Value for clamping large dw and dh predictions. The heuristic is that we clamp # such that dw and dh are no larger than what would transform a 16px box into a # 1000px box (based on a small anchor, 16px, and a typical image size, 1000px). - SCALE_CLAMP = 4.135 # ~= np.log(1000. / 16.) + SCALE_CLAMP = 4.135 # ~= np.log(1000. / 16.) deltas[..., 2:] = deltas[..., 2:].clamp(min=-SCALE_CLAMP, max=SCALE_CLAMP) pred_wh = deltas[..., 2:].exp() * anchors_wh @@ -53,6 +54,7 @@ def box_area(box): """ return (box[..., 2] - box[..., 0]) * (box[..., 3] - box[..., 1]) + def clip_bboxes(bboxes, size): """Args: bboxes (torch.Tensor): in `ltrb` format. Shape [N, 4] @@ -61,14 +63,15 @@ def clip_bboxes(bboxes, size): bboxes[:, 1::2] = bboxes[:, 1::2].clamp(0, size[0]) return bboxes + def clip_bboxes_batch(bboxes, size): # type: (Tensor, Tensor)->Tensor """Args: bboxes (torch.Tensor): in `ltrb` format. Shape [BS, N, 4] size (torch.Tensor): (H, W). Shape [BS, 2] """ size = size.to(bboxes) - h_size = size[..., 0].view(-1, 1, 1) #.float() - w_size = size[..., 1].view(-1, 1, 1) #.float() + h_size = size[..., 0].view(-1, 1, 1) # .float() + w_size = size[..., 1].view(-1, 1, 1) # .float() h_bboxes = bboxes[..., 1::2] w_bboxes = bboxes[..., 0::2] zeros = torch.zeros_like(h_bboxes) @@ -79,6 +82,7 @@ def clip_bboxes_batch(bboxes, size): # bboxes[:, 1::2] = bboxes[:, 1::2].clamp(0, size[0].item()) return bboxes + # implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py # with slight modifications def box_iou(boxes1, boxes2): @@ -107,13 +111,9 @@ def box_iou(boxes1, boxes2): # based on https://github.com/NVIDIA/retinanet-examples/ -# and on https://github.com/google/automl/ +# and on https://github.com/google/automl/ def generate_anchors_boxes( - image_size, - num_scales=3, - aspect_ratios=(1.0, 2.0, 0.5), - pyramid_levels=[3, 4, 5, 6, 7], - anchor_scale=4, + image_size, num_scales=3, aspect_ratios=(1.0, 2.0, 0.5), pyramid_levels=[3, 4, 5, 6, 7], anchor_scale=4, ): """Generates multiscale anchor boxes Minimum object size which could be detected is anchor_scale * 2**pyramid_levels[0]. By default it's 32px @@ -132,28 +132,28 @@ def generate_anchors_boxes( boxes are in 'ltrb' format num_anchors (int): number of anchors per location """ - + if isinstance(image_size, int): image_size = (image_size, image_size) scale_vals = [anchor_scale * 2 ** (i / num_scales) for i in range(num_scales)] # from lowest stride to largest. Anchors from models should be in the same order! - strides = [2**i for i in pyramid_levels] - + strides = [2 ** i for i in pyramid_levels] + # get offsets for anchor boxes for one pixel # can rewrite in pure Torch but using np is more convenient. This function usually should only be called once num_anchors = len(scale_vals) * len(aspect_ratios) ratio_vals_sq = np.sqrt(np.tile(aspect_ratios, len(scale_vals))) scale_vals = np.repeat(scale_vals, len(aspect_ratios))[:, np.newaxis] - wh = np.stack([np.ones(num_anchors) * ratio_vals_sq, np.ones(num_anchors) / ratio_vals_sq], axis=1) - lt = - 0.5 * wh * scale_vals + wh = np.stack([np.ones(num_anchors) * ratio_vals_sq, np.ones(num_anchors) / ratio_vals_sq], axis=1) + lt = -0.5 * wh * scale_vals rb = 0.5 * wh * scale_vals - base_offsets = torch.from_numpy(np.hstack([lt, rb])).float() # [num_anchors, 4] - base_offsets = base_offsets.view(-1, 1, 1, 4) # [num_anchors, 1, 1, 4] + base_offsets = torch.from_numpy(np.hstack([lt, rb])).float() # [num_anchors, 4] + base_offsets = base_offsets.view(-1, 1, 1, 4) # [num_anchors, 1, 1, 4] # generate anchor boxes for all given strides all_anchors = [] for stride in strides: y, x = torch.meshgrid([torch.arange(stride / 2, image_size[i], stride) for i in range(2)]) - xyxy = torch.stack((x, y, x, y), 2).unsqueeze(0) + xyxy = torch.stack((x, y, x, y), 2).unsqueeze(0) # permute to match TF EffDet anchors order after reshape anchors = (xyxy + base_offsets * stride).permute(1, 2, 0, 3).reshape(-1, 4) all_anchors.append(anchors) @@ -162,6 +162,7 @@ def generate_anchors_boxes( # clip_bboxes(all_anchors, image_size) return all_anchors, num_anchors + def generate_targets(anchors, batch_gt_boxes, num_classes, matched_iou=0.5, unmatched_iou=0.4): """Generate targets for regression and classification @@ -169,34 +170,35 @@ def generate_targets(anchors, batch_gt_boxes, num_classes, matched_iou=0.5, unma 1) IoU >= matched_iou: Highest similarity. Matched/Positive. Mask value is 1 2) matched_iou > IoU >= unmatched_iou: Medium similarity. Ignored. Mask value is -1 3) unmatched_iou > IoU: Lowest similarity. Unmatched/Negative. Mask value is 0 - + Args: anchors (torch.Tensor): all anchors on a single image. shape [N, 4] - batch_gt_boxes (torch.Tesor): all groud truth bounding boxes and classes for the batch. shape [BS, N, 5] - classes are expected to be in the last column. + batch_gt_boxes (torch.Tensor): all ground truth bounding boxes and classes for the batch. shape [BS, N, 5] + classes are expected to be in the last column. bboxes are in `ltrb` format! num_classes (int): number of classes. needed for one-hot encoding labels - matched_iou (float): + matched_iou (float): unmatched_iou (float): - + Returns: box_target, cls_target, matches_mask - + """ + def _generate_single_targets(gt_boxes): gt_boxes, gt_classes = gt_boxes.split(4, dim=1) overlap = box_iou(anchors, gt_boxes) - + # Keep best box per anchor overlap, indices = overlap.max(1) box_target = box2delta(gt_boxes[indices], anchors) - - # There are three types of anchors. + + # There are three types of anchors. # matched (with objects), unmatched (with background), and in between (which should be ignored) IGNORED_VALUE = -1 UNMATCHED_VALUE = 0 matches_mask = torch.ones_like(overlap) * IGNORED_VALUE - matches_mask[overlap < unmatched_iou] = UNMATCHED_VALUE # background + matches_mask[overlap < unmatched_iou] = UNMATCHED_VALUE # background matches_mask[overlap >= matched_iou] = 1 # Generate one-hot-encoded target classes @@ -206,11 +208,11 @@ def _generate_single_targets(gt_boxes): gt_classes = gt_classes[indices].long() gt_classes[overlap < unmatched_iou] = num_classes # background has no class cls_target.scatter_(1, gt_classes, 1) - cls_target = cls_target[:, :num_classes] # remove background class from one-hot + cls_target = cls_target[:, :num_classes] # remove background class from one-hot return cls_target, box_target, matches_mask - - anchors = anchors.to(batch_gt_boxes) # change device & type if needed + + anchors = anchors.to(batch_gt_boxes) # change device & type if needed batch_results = ([], [], []) for single_gt_boxes in batch_gt_boxes: single_target_results = _generate_single_targets(single_gt_boxes) @@ -218,7 +220,8 @@ def _generate_single_targets(gt_boxes): batch_res.append(single_res) b_cls_target, b_box_target, b_matches_mask = [torch.stack(targets) for targets in batch_results] return b_cls_target, b_box_target, b_matches_mask - + + # copied from torchvision def batched_nms(boxes, scores, idxs, iou_threshold): # type: (Tensor, Tensor, Tensor, float)->Tensor @@ -257,9 +260,10 @@ def batched_nms(boxes, scores, idxs, iou_threshold): keep = torch.ops.torchvision.nms(boxes_for_nms, scores, iou_threshold) return keep + # jit actually makes it slower for fp16 and results are different! # FIXME: check it after 1.6 release. maybe they will fix JIT by that time -# @torch.jit.script +# @torch.jit.script def decode( batch_cls_head, batch_box_head, @@ -274,8 +278,8 @@ def decode( # type: (Tensor, Tensor, Tensor, Tensor, Tensor, float, int, int, float)->Tensor """ Decodes raw outputs of a model for easy visualization of bboxes - - Args: + + Args: batch_cls_head (torch.Tensor): shape [BS, *, NUM_CLASSES] batch_box_head (torch.Tensor): shape [BS, *, 4] anchors (torch.Tensor): shape [*, 4] @@ -285,25 +289,27 @@ def decode( max_detection_points (int): Maximum number of bboxes to consider for NMS for one image max_detection_per_image (int): Maximum number of bboxes to return per image iou_threshold (float): iou_threshold for Non Maximum Supression - + Returns: torch.Tensor with bboxes, scores and classes shape [BS, MAX_DETECTION_PER_IMAGE, 6]. bboxes in 'ltrb' format. If img_shape is not given they are NOT CLIPPED (!) """ - + batch_size = batch_cls_head.size(0) num_classes = batch_cls_head.size(-1) - anchors = anchors.to(batch_cls_head).unsqueeze(0).expand(batch_size, -1, -1) # [N, 4] -> [BS, N, 4] + anchors = anchors.to(batch_cls_head).unsqueeze(0).expand(batch_size, -1, -1) # [N, 4] -> [BS, N, 4] # it has to be raw logits but check anyway to avoid applying sigmoid twice if batch_cls_head.min() < 0 or batch_cls_head.max() > 1: batch_cls_head = batch_cls_head.sigmoid() - - # It's much faster to calculate topk once for full batch here rather than doing it inside loop + + # It's much faster to calculate topk once for full batch here rather than doing it inside loop # In TF The same bbox may belong to two different objects # select `max_detection_points` scores and corresponding bboxes - scores_topk_all, cls_topk_indices_all = torch.topk(batch_cls_head.view(batch_size, -1), k=max_detection_points) + scores_topk_all, cls_topk_indices_all = torch.topk( + batch_cls_head.view(batch_size, -1), k=max_detection_points + ) indices_all = cls_topk_indices_all / num_classes classes_all = cls_topk_indices_all % num_classes @@ -322,32 +328,29 @@ def decode( out_classes = torch.zeros((batch_size, max_detection_per_image)).to(batch_cls_head) for batch in range(batch_size): - scores_topk = scores_topk_all[batch] # , cls_topk_indices_all[batch] - classes = classes_all[batch] #cls_topk_indices % num_classes - box_topk = box_topk_all[batch] # torch.gather(batch_box_head[batch], 0, indices) - anchor_topk = anchors_topk_all[batch] - regressed_boxes = regressed_boxes_all[batch] # delta2box(box_topk, anchor_topk) + scores_topk = scores_topk_all[batch] # , cls_topk_indices_all[batch] + classes = classes_all[batch] # cls_topk_indices % num_classes + regressed_boxes = regressed_boxes_all[batch] # delta2box(box_topk, anchor_topk) # apply NMS nms_idx = batched_nms(regressed_boxes, scores_topk, classes, iou_threshold) - nms_idx = nms_idx[:min(len(nms_idx), max_detection_per_image)] + nms_idx = nms_idx[: min(len(nms_idx), max_detection_per_image)] # select suppressed bboxes im_scores = scores_topk[nms_idx] im_classes = classes[nms_idx] im_bboxes = regressed_boxes[nms_idx] im_classes += 1 # back to class idx with background class = 0 - out_scores[batch, :im_scores.size(0)] = im_scores - out_classes[batch, :im_classes.size(0)] = im_classes - out_boxes[batch, :im_bboxes.size(0)] = im_bboxes + out_scores[batch, : im_scores.size(0)] = im_scores + out_classes[batch, : im_classes.size(0)] = im_classes + out_boxes[batch, : im_bboxes.size(0)] = im_bboxes # no need to pad because it's already padded with 0's - ## old way ## # get regressed bboxes # all_img_bboxes = delta2box(batch_box_head[batch], anchors) # if img_shape: # maybe clip - # all_img_bboxes = clip_bboxes(all_img_bboxes, img_shape) + # all_img_bboxes = clip_bboxes(all_img_bboxes, img_shape) # select at most `top_n` bboxes and from them select with score > threshold # max_cls_score, max_cls_idx = batch_cls_head[batch].max(1) # top_cls_score, top_cls_idx = max_cls_score.topk(top_n) @@ -356,15 +359,15 @@ def decode( # im_scores = max_cls_score[top_cls_idx] # im_classes = max_cls_idx[top_cls_idx] # im_bboxes = all_img_bboxes[top_cls_idx] - + # apply NMS # nms_idx = batched_nms(im_bboxes, im_scores, im_classes, iou_threshold) # im_scores = im_scores[nms_idx] # im_classes = im_classes[nms_idx] # im_bboxes = im_bboxes[nms_idx] - + # out_scores[batch, :im_scores.size(0)] = im_scores # out_classes[batch, :im_classes.size(0)] = im_classes # out_boxes[batch, :im_bboxes.size(0)] = im_bboxes - - return torch.cat([out_boxes, out_scores.unsqueeze(-1), out_classes.unsqueeze(-1)], dim=2) \ No newline at end of file + + return torch.cat([out_boxes, out_scores.unsqueeze(-1), out_classes.unsqueeze(-1)], dim=2) diff --git a/pytorch_tools/utils/misc.py b/pytorch_tools/utils/misc.py index cf1fc85..733a22e 100644 --- a/pytorch_tools/utils/misc.py +++ b/pytorch_tools/utils/misc.py @@ -5,10 +5,10 @@ import random import collections import numpy as np +from functools import partial import torch.nn as nn import torch.nn.functional as F import torch.distributed as dist -from functools import partial def initialize_fn(m): @@ -27,10 +27,12 @@ def initialize_fn(m): nn.init.kaiming_uniform_(m.weight, mode="fan_out", nonlinearity="linear") nn.init.constant_(m.bias, 0) + def initialize(module): for m in module.modules(): initialize_fn(m) + def initialize_iterator(module_iterator): for m in module_iterator: initialize_fn(m) @@ -219,6 +221,7 @@ def make_divisible(v, divisor=8): new_v += divisor return new_v + def repeat_channels(conv_weights, new_channels, old_channels=3): """Repeat channels to match new number of input channels Args: @@ -228,5 +231,5 @@ def repeat_channels(conv_weights, new_channels, old_channels=3): """ rep_times = math.ceil(new_channels / old_channels) new_weights = conv_weights.repeat(1, rep_times, 1, 1)[:, :new_channels, :, :] - new_weights *= old_channels / new_channels # to keep the same output amplitude - return new_weights \ No newline at end of file + new_weights *= old_channels / new_channels # to keep the same output amplitude + return new_weights diff --git a/tests/detection_models/test_det_models.py b/tests/detection_models/test_det_models.py index 62ebb13..0ab1ff2 100644 --- a/tests/detection_models/test_det_models.py +++ b/tests/detection_models/test_det_models.py @@ -51,7 +51,7 @@ def test_coco_pretrain(arch): im = np.array(im.resize((inp_size, inp_size))) im_t = tensor_from_rgb_image(preprocess_fn(im)).unsqueeze(0).float().cuda() boxes_scores_classes = m.predict(im_t) - # check that most confident bbox is close to correct class. The reason for such strange test is + # check that most confident bbox is close to correct class. The reason for such strange test is # because in different models class mappings are shifted by +- 1 assert (boxes_scores_classes[0, 0, 5] - im_cls) < 2 @@ -61,6 +61,7 @@ def test_pretrain_custom_num_classes(arch): m = pt_det.__dict__[arch](pretrained="coco", num_classes=80).eval().cuda() _test_forward(m) + @pytest.mark.parametrize("arch", MODEL_NAMES[:2]) def test_encoder_frozenabn(arch): m = pt_det.__dict__[arch](encoder_norm_layer="frozenabn").eval().cuda() diff --git a/tests/losses/test_losses.py b/tests/losses/test_losses.py index cf3090e..2f7a741 100644 --- a/tests/losses/test_losses.py +++ b/tests/losses/test_losses.py @@ -52,9 +52,7 @@ def test_focal_loss_fn_basic(): @pytest.mark.parametrize("reduction", ["sum", "mean", "none"]) def test_focal_loss_fn_reduction(reduction): - torch_ce = F.binary_cross_entropy_with_logits( - INP_BINARY, TARGET_BINARY.float(), reduction=reduction - ) + torch_ce = F.binary_cross_entropy_with_logits(INP_BINARY, TARGET_BINARY.float(), reduction=reduction) my_ce = pt_F.focal_loss_with_logits(INP_BINARY, TARGET_BINARY, alpha=0.5, gamma=0, reduction=reduction) assert torch.allclose(torch_ce, my_ce * 2) @@ -108,6 +106,7 @@ def test_focal_loss(): fl_i = losses.FocalLoss(mode="binary", reduction="sum", ignore_label=-100)(INP_IMG_BINARY, y_true) assert torch.allclose(fl.sum() - loss_diff, fl_i) + @pytest.mark.parametrize( ["y_true", "y_pred", "expected"], [ @@ -333,9 +332,7 @@ def test_binary_cross_entropy(reduction): assert torch.allclose(torch_ce, my_ce) # test for images - torch_ce = F.binary_cross_entropy_with_logits( - INP_IMG_BINARY, TARGET_IMG_BINARY, reduction=reduction - ) + torch_ce = F.binary_cross_entropy_with_logits(INP_IMG_BINARY, TARGET_IMG_BINARY, reduction=reduction) my_ce = my_ce_loss(INP_IMG_BINARY, TARGET_IMG_BINARY) assert torch.allclose(torch_ce, my_ce) @@ -391,4 +388,4 @@ def test_binary_hinge(): @pytest.mark.parametrize("reduction", ["sum", "mean", "none"]) def test_smoothl1(reduction): loss_my = losses.SmoothL1Loss(delta=1, reduction=reduction)(INP, TARGET_MULTILABEL) - loss_torch = F.smooth_l1_loss(INP, TARGET_MULTILABEL, reduction=reduction) \ No newline at end of file + loss_torch = F.smooth_l1_loss(INP, TARGET_MULTILABEL, reduction=reduction) diff --git a/tests/models/test_models.py b/tests/models/test_models.py index d5fb396..0042a85 100644 --- a/tests/models/test_models.py +++ b/tests/models/test_models.py @@ -23,7 +23,14 @@ HRNET_NAMES = [name for name in ALL_MODEL_NAMES if "hrnet" in name] # test only part of the models -TEST_MODEL_NAMES = DENSENET_NAMES[:1] + EFFNET_NAMES[:1] + VGG_NAMES[:1] + RESNET_NAMES[:1] + TRESNET_NAMES[:1] + HRNET_NAMES[:1] +TEST_MODEL_NAMES = ( + DENSENET_NAMES[:1] + + EFFNET_NAMES[:1] + + VGG_NAMES[:1] + + RESNET_NAMES[:1] + + TRESNET_NAMES[:1] + + HRNET_NAMES[:1] +) # TEST_MODEL_NAMES = HRNET_NAMES[:1] INP = torch.ones(2, 3, 128, 128) @@ -52,6 +59,7 @@ def test_custom_in_channels(arch): with torch.no_grad(): m(torch.ones(2, 5, 128, 128)) + @pytest.mark.parametrize("arch", EFFNET_NAMES[:2] + RESNET_NAMES[:2]) def test_pretrained_custom_in_channels(arch): m = models.__dict__[arch](in_channels=5, pretrained="imagenet") @@ -82,11 +90,13 @@ def test_dilation(arch, output_stride): W, H = INP.shape[-2:] assert res.shape[-2:] == (W // output_stride, H // output_stride) + @pytest.mark.parametrize("arch", EFFNET_NAMES[:2] + RESNET_NAMES[:2]) def test_drop_connect(arch): m = models.__dict__[arch](drop_connect_rate=0.2) _test_forward(m) + NUM_PARAMS = { "tresnetm": 31389032, "tresnetl": 55989256, @@ -96,13 +106,16 @@ def test_drop_connect(arch): "efficientnet_b2": 9109994, "efficientnet_b3": 12233232, } -@pytest.mark.parametrize('name_num_params', zip(NUM_PARAMS.items())) + + +@pytest.mark.parametrize("name_num_params", zip(NUM_PARAMS.items())) def test_num_parameters(name_num_params): name, num_params = name_num_params[0] m = models.__dict__[name]() assert pt.utils.misc.count_parameters(m)[0] == num_params -@pytest.mark.parametrize('stem_type', ["", "deep", "space2depth"]) + +@pytest.mark.parametrize("stem_type", ["", "deep", "space2depth"]) def test_resnet_stem_type(stem_type): m = models.resnet50(stem_type=stem_type) - _test_forward(m) \ No newline at end of file + _test_forward(m) diff --git a/tests/models/test_weights.py b/tests/models/test_weights.py index c22c173..c68cd72 100644 --- a/tests/models/test_weights.py +++ b/tests/models/test_weights.py @@ -55,6 +55,7 @@ def test_imagenet_pretrain(arch): pred_cls = m(im).argmax() assert pred_cls == im_cls + # test that output mean for fixed input is the same MODEL_NAMES2 = [ "resnet34", @@ -68,6 +69,7 @@ def test_imagenet_pretrain(arch): "efficientnet_b0": 0.0070, } + @pytest.mark.parametrize("arch", MODEL_NAMES2) def test_output_mean(arch): m = models.__dict__[arch](pretrained="imagenet") diff --git a/tests/modules/test_modules.py b/tests/modules/test_modules.py index 969adc0..1be5837 100644 --- a/tests/modules/test_modules.py +++ b/tests/modules/test_modules.py @@ -13,12 +13,14 @@ def test_activations_init(activation): res = act(inp) assert res.mean() + def test_frozen_abn(): l = modules.bn_from_name("frozen_abn")(10) assert list(l.parameters()) == [] l = modules.ABN(10, frozen=True) assert list(l.parameters()) == [] + # need to test and resnet and vgg because in resnet there are no Convs with bias # and in VGG there are no Convs without bias @pytest.mark.parametrize("norm_layer", ["abn", "agn"]) diff --git a/tests/segmentation_models/test_segm_models.py b/tests/segmentation_models/test_segm_models.py index d454815..cb6378d 100644 --- a/tests/segmentation_models/test_segm_models.py +++ b/tests/segmentation_models/test_segm_models.py @@ -7,12 +7,13 @@ INP = torch.ones(2, 3, 64, 64) ENCODERS = ["resnet34", "se_resnet50", "efficientnet_b1", "densenet121"] -SEGM_ARCHS = [pt_sm.Unet, pt_sm.Linknet, pt_sm.DeepLabV3, pt_sm.SegmentationFPN, pt_sm.SegmentationBiFPN] +SEGM_ARCHS = [pt_sm.Unet, pt_sm.Linknet, pt_sm.DeepLabV3, pt_sm.SegmentationFPN] # pt_sm.SegmentationBiFPN # this lines are usefull for quick tests # ENCODERS = ["se_resnet50"] # SEGM_ARCHS = [pt_sm.SegmentationFPN, pt_sm.SegmentationFPN] + def _test_forward(model): with torch.no_grad(): return model(INP) @@ -47,21 +48,24 @@ def test_num_classes(encoder_name, model_class): out = _test_forward(m) assert out.size(1) == 5 + @pytest.mark.parametrize("encoder_name", ENCODERS) @pytest.mark.parametrize("model_class", SEGM_ARCHS) def test_drop_rate(encoder_name, model_class): m = model_class(encoder_name=encoder_name, drop_rate=0.2) _test_forward(m) + @pytest.mark.parametrize("encoder_name", ENCODERS) @pytest.mark.parametrize("model_class", [pt_sm.DeepLabV3]) # pt_sm.Unet, pt_sm.Linknet @pytest.mark.parametrize("output_stride", [32, 16, 8]) def test_dilation(encoder_name, model_class, output_stride): if output_stride == 8 and model_class != pt_sm.DeepLabV3: - return None # OS=8 only supported for Deeplab + return None # OS=8 only supported for Deeplab m = model_class(encoder_name=encoder_name, output_stride=output_stride) _test_forward(m) + @pytest.mark.parametrize("model_class", [pt_sm.DeepLabV3, pt_sm.SegmentationFPN]) def test_deeplab_last_upsample(model_class): m = model_class(last_upsample=True) @@ -74,7 +78,8 @@ def test_deeplab_last_upsample(model_class): # should be 4 times smaller assert tuple(out.shape[-2:]) == (W // 4, H // 4) + @pytest.mark.parametrize("merge_policy", ["add", "cat"]) def test_merge_policy(merge_policy): m = pt_sm.SegmentationFPN(merge_policy=merge_policy) - _test_forward(m) \ No newline at end of file + _test_forward(m) diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 13b4f23..e3257f4 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -2,18 +2,23 @@ import pytest import pytorch_tools as pt + def random_boxes(mean_box, stdev, N): return torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float) + +# fmt: off DEVICE_DTYPE = [ ("cpu", torch.float), ("cuda", torch.float), ("cuda", torch.half) ] +# fmt: on # check that it works for all combinations of dtype and device @pytest.mark.parametrize("device_dtype", DEVICE_DTYPE) def test_clip_bboxes(device_dtype): device, dtype = device_dtype + # fmt: off bboxes = torch.tensor( [ [-5, -10, 50, 100], @@ -30,6 +35,7 @@ def test_clip_bboxes(device_dtype): device=device, dtype=dtype, ) + # fmt: on size = (60, 40) # test single bbox clip res1 = pt.utils.box.clip_bboxes(bboxes, size) @@ -55,9 +61,11 @@ def test_clip_bboxes(device_dtype): res5 = jit_clip(batch_bboxes.clone(), batch_sizes) assert torch.allclose(res5, batch_expected) + @pytest.mark.parametrize("device_dtype", DEVICE_DTYPE) def test_delta2box(device_dtype): device, dtype = device_dtype + # fmt: off anchors = torch.tensor( [ [ 0., 0., 1., 1.], @@ -84,12 +92,13 @@ def test_delta2box(device_dtype): [0.0000, 0.0000, 1.0000, 1.0000], [0.1409, 0.1409, 2.8591, 2.8591], [-3.1945, 0.3161, 4.1945, 0.6839], - [5.0000, 5.0000, 5.0000, 5.0000] + [5.0000, 5.0000, 5.0000, 5.0000], ], device=device, dtype=dtype, ) - res1 = pt.utils.box.delta2box(deltas, anchors) + # fmt: on + res1 = pt.utils.box.delta2box(deltas, anchors) assert torch.allclose(res1, expected_res, atol=3e-4) BS = 4 @@ -97,8 +106,8 @@ def test_delta2box(device_dtype): batch_deltas = deltas.unsqueeze(0).expand(BS, -1, -1) batch_expected = expected_res.unsqueeze(0).expand(BS, -1, -1) - # test applying to batch - res2 = pt.utils.box.delta2box(batch_deltas.clone(), batch_anchors) + # test applying to batch + res2 = pt.utils.box.delta2box(batch_deltas.clone(), batch_anchors) assert torch.allclose(res2, batch_expected, atol=3e-4) # check that function is JIT script friendly @@ -106,6 +115,7 @@ def test_delta2box(device_dtype): res3 = jit_func(batch_deltas.clone(), batch_anchors) assert torch.allclose(res3, batch_expected, atol=3e-4) + @pytest.mark.parametrize("device_dtype", DEVICE_DTYPE) def test_box2delta(device_dtype): ## this test only checks that encoding and decoding gives the same result @@ -114,12 +124,12 @@ def test_box2delta(device_dtype): anchors = random_boxes([10, 10, 20, 20], 10, 10).to(device).to(dtype) deltas = pt.utils.box.box2delta(boxes, anchors) boxes_reconstructed = pt.utils.box.delta2box(deltas, anchors) - atol = 2e-2 if dtype == torch.half else 1e-6 # for fp16 sometimes error is large - assert torch.allclose(boxes, boxes_reconstructed, atol=atol) + atol = 2e-2 if dtype == torch.half else 1e-6 # for fp16 sometimes error is large + assert torch.allclose(boxes, boxes_reconstructed, atol=atol) # check that it's jit friendly jit_box2delta = torch.jit.script(pt.utils.box.box2delta) jit_delta2box = torch.jit.script(pt.utils.box.delta2box) deltas2 = jit_box2delta(boxes, anchors) boxes_reconstructed2 = jit_delta2box(deltas2, anchors) - assert torch.allclose(boxes, boxes_reconstructed2, atol=atol) \ No newline at end of file + assert torch.allclose(boxes, boxes_reconstructed2, atol=atol)