forked from zylo117/Yet-Another-EfficientDet-Pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
inference_MSA.py
223 lines (178 loc) · 10.2 KB
/
inference_MSA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
"""
EfficientPose (c) by Steinbeis GmbH & Co. KG für Technologietransfer
Haus der Wirtschaft, Willi-Bleicher-Straße 19, 70174 Stuttgart, Germany
Yannick Bukschat: [email protected]
Marcus Vetter: [email protected]
EfficientPose is licensed under a
Creative Commons Attribution-NonCommercial 4.0 International License.
The license can be found in the LICENSE file in the root directory of this source tree
or at http://creativecommons.org/licenses/by-nc/4.0/.
---------------------------------------------------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------------------------------------------------
Based on:
Keras EfficientDet implementation (https://github.com/xuannianz/EfficientDet) licensed under the Apache License, Version 2.0
---------------------------------------------------------------------------------------------------------------------------------
The official EfficientDet implementation (https://github.com/google/automl) licensed under the Apache License, Version 2.0
---------------------------------------------------------------------------------------------------------------------------------
"""
import cv2
import numpy as np
import os,math,time
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.backends import cudnn
from efficientdet.model import FilterDetections
from backbone import EfficientPoseBackbone_WMSA
#from efficientdet.utils import BBoxTransform, ClipBoxes
#from utils.utils import STANDARD_COLORS, standard_to_bgr, get_index_label, plot_one_box,postprocess_det
from utils.utils import preprocess_pose, postprocess_pose, postprocess_pose_org, get_linemod_camera_matrix, get_linemod_3d_bboxes
from utils.visualization import draw_detections
from torch.profiler import profile, record_function, ProfilerActivity
compound_coef = 0 # 耦合因子φ
force_input_size = None # set None to use default size
#img_path = 'test/img.png'
# replace this part with your project's anchor config
# anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
# anchor_scales = [2**0, 2**(1.0/3.0), 2**(2.0/3.0)]
score_threshold = 0.01
nms_threshold = 0.01
# iou_threshold = 0.2
use_cuda = True #False #
use_float16 = False
cudnn.fastest = True # type: ignore
cudnn.benchmark = True # type: ignore
class ModelWithFilterDet(nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
self.filter_det = FilterDetections(
num_rotation_parameters = 3,
num_translation_parameters = 3,
nms = True,
class_specific_filter = False,#True,
nms_threshold = nms_threshold,
score_threshold = score_threshold,
max_detections = 100)
def forward(self, inputs):
features, regression, classification, translation, rotation, anchors, bboxes = self.model(inputs)
classification = torch.sigmoid(classification)
boxes, scores, labels, rotation, translation = self.filter_det([bboxes, classification, translation, rotation])
return boxes.cpu().numpy(), \
scores.cpu().numpy(), \
labels.cpu().numpy(), \
rotation.cpu().numpy(), \
translation.cpu().numpy()\
def main():
#input parameter
path_to_images = "../datasets/Linemod_preprocessed/data/08/rgb/"
image_extension = ".png"
#path_to_weights = f'weights/trained/efficientpose-d{compound_coef}_linemod_obj8_one_last_train.pth'
path_to_weights = f'weights/trained_MSA/obj_8/efficientpose-d{compound_coef}_linemod_obj8_one_best_train.pth'
batch_size = 1
save_path = "./predictions/linemod" #where to save the images or None if the images should be displayed and not saved
#class_to_name = {0: "ape", 1: "can", 2: "cat", 3: "driller", 4: "duck", 5: "eggbox", 6: "glue", 7: "holepuncher"} #Occlusion
class_to_name = {0: "driller"} #Linemod use a single class with a name of the Linemod objects
translation_scale_norm = 1000.0
draw_bbox_2d = True #False
draw_name = True #False
#for the linemod and occlusion trained models take this camera matrix and these 3d models. in case you trained a model on a custom dataset you need to take the camera matrix and 3d cuboids from your custom dataset.
camera_matrix = get_linemod_camera_matrix()
name_to_3d_bboxes = get_linemod_3d_bboxes()
class_to_3d_bboxes = {class_idx: name_to_3d_bboxes[name] for class_idx, name in class_to_name.items()}
num_classes = len(class_to_name)
if not os.path.exists(path_to_images):
print("Error: the given path to the images {} does not exist!".format(path_to_images))
return
image_list = [filename for filename in os.listdir(path_to_images) if image_extension in filename][:100]
print("\nInfo: found {} image files".format(len(image_list)))
#build model and load weights
model = EfficientPoseBackbone_WMSA(compound_coef=compound_coef,
num_classes=num_classes,
num_anchors=9,
freeze_bn=True
#score_threshold = args.score_threshold,
#num_rotation_parameters = num_rotation_parameters)
)
#print(model)
temp_weight = torch.load(path_to_weights, map_location='cpu')
# del temp_weight['classifier.header.pointwise_conv.conv.weight']
# del temp_weight['classifier.header.pointwise_conv.conv.bias']
model.load_state_dict(temp_weight, strict = False) # 类别数变了 删掉这部分权重再load
model = ModelWithFilterDet(model)
model.requires_grad_(False)
model.eval()
if use_cuda:
model = model.cuda()
if use_float16:
model = model.half()
#inferencing
with torch.no_grad():
input_list = []
input_batch_list = [[],[]]
scale_batch_list = []
img_org_path_list = []
print("load images...")
for image_filename in tqdm(image_list):
#load image
image_path = os.path.join(path_to_images, image_filename)
image = cv2.imread(image_path)
#original_image = image.copy()
#preprocessing
# image_size = model.input_sizes[compound_coef] # type: ignore
image_size = model.model.input_sizes[compound_coef] # type: ignore
input_list, scale = preprocess_pose(image, image_size, camera_matrix, translation_scale_norm)
input_batch_list[0].append(input_list[0]) # img_batch_list
input_batch_list[1].append(input_list[1]) # cam_batch_list
img_org_path_list.append(image_path)
scale_batch_list.append(scale) # 注:目前这个参数在后处理中没有用到 怀疑用不到它 在网络正向里面有在生成anchor,所以我怀疑不需要再在后处理做缩放?
#predict
for idx_i in tqdm(range(0,len(image_list),batch_size)) :
if use_cuda:
input_img_batch = torch.cat([torch.from_numpy(img).cuda() for img in
input_batch_list[0][idx_i:idx_i+batch_size]], 0)
input_cam_batch = torch.cat([torch.from_numpy(cam).cuda() for cam in
input_batch_list[1][idx_i:idx_i+batch_size]], 0)
else:
input_img_batch = torch.cat([torch.from_numpy(img) for img in
input_batch_list[0][idx_i:idx_i+batch_size]], 0)
input_cam_batch = torch.cat([torch.from_numpy(cam) for cam in
input_batch_list[1][idx_i:idx_i+batch_size]], 0)
input_img_batch = input_img_batch.to(torch.float32 if not use_float16
else torch.float16).permute(0,3,1,2)
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
record_shapes=True,
profile_memory = True,
use_cuda=False) as prof:
with record_function("model_inference"):
# boxes, scores, labels, rotations, translations, anchors = model((input_imgs,input_cams))
boxes, scores, labels, rotations, translations = model((input_img_batch,input_cam_batch))
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
for idx_j in range(idx_i, idx_i+batch_size) :
#postprocessing
boxes, scores, labels, rotations, translations = postprocess_pose_org(
boxes, scores, labels, rotations, translations,
scale = scale_batch_list[idx_j],
score_threshold = score_threshold)
org_image = cv2.imread(img_org_path_list[idx_j])
draw_detections(org_image,
boxes,
scores,
labels,
rotations,
translations,
class_to_bbox_3D = class_to_3d_bboxes,
camera_matrix = camera_matrix,
label_to_name = class_to_name,
draw_bbox_2d = draw_bbox_2d,
draw_name = draw_name)
if save_path is None:
#display image with predictions
cv2.imshow('image with predictions', org_image)
cv2.waitKey(0)
else:
#only save images to the given path
os.makedirs(save_path, exist_ok = True)
cv2.imwrite(os.path.join(save_path, image_list[idx_j].replace(image_extension, "_predicted" + image_extension)), org_image)
if __name__ == '__main__':
main()