-
Notifications
You must be signed in to change notification settings - Fork 17
/
RGBBranch.py
95 lines (80 loc) · 2.86 KB
/
RGBBranch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import torch.nn as nn
from torchvision.models import resnet
class RGBBranch(nn.Module):
"""
Generate Model Architecture
"""
def __init__(self, arch, scene_classes=1055):
super(RGBBranch, self).__init__()
# --------------------------------#
# Base Network #
# ------------------------------- #
if arch == 'ResNet-18':
# ResNet-18 Network
base = resnet.resnet18(pretrained=True)
# Size parameters for ResNet-18
size_fc_RGB = 512
elif arch == 'ResNet-50':
# ResNet-50 Network
base = resnet.resnet50(pretrained=True)
# Size parameters for ResNet-50
size_fc_RGB = 2048
# --------------------------------#
# RGB Branch #
# ------------------------------- #
# First initial block
self.in_block = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1, return_indices=True)
)
# Encoder
self.encoder1 = base.layer1
self.encoder2 = base.layer2
self.encoder3 = base.layer3
self.encoder4 = base.layer4
# -------------------------------------#
# RGB Classifier #
# ------------------------------------ #
self.dropout = nn.Dropout(0.3)
self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(size_fc_RGB, scene_classes)
# Loss
self.criterion = nn.CrossEntropyLoss()
def forward(self, x, sem):
"""
Netowrk forward
:param x: RGB Image
:return: Scene recognition predictions
"""
# --------------------------------#
# RGB Branch #
# ------------------------------- #
x, pool_indices = self.in_block(x)
e1 = self.encoder1(x)
e2 = self.encoder2(e1)
e3 = self.encoder3(e2)
e4 = self.encoder4(e3)
# -------------------------------------#
# RGB Classifier #
# ------------------------------------ #
act = self.avgpool(e4)
act = act.view(act.size(0), -1)
act = self.dropout(act)
act = self.fc(act)
act_rgb = act
act_sem = act
return act, e4, act_rgb, act_sem
def loss(self, x, target):
"""
Funtion to comput the loss
:param x: Predictions obtained by the network
:param target: Ground-truth scene recognition labels
:return: Loss value
"""
# Check inputs
assert (x.shape[0] == target.shape[0])
# Classification loss
loss = self.criterion(x, target.long())
return loss