forked from feiyuhuahuo/Yolact_minimal
-
Notifications
You must be signed in to change notification settings - Fork 1
/
eval.py
394 lines (307 loc) · 15.3 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
from data.coco import COCODetection, COCO_LABEL_MAP
from modules.build_yolact import Yolact
from utils.augmentations import BaseTransform
from utils.functions import MovingAverage, ProgressBar
from utils.box_utils import bbox_iou, mask_iou
from utils import timer
from utils.output_utils import after_nms, NMS
import pycocotools
from data.config import cfg, set_cfg
import numpy as np
import torch
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import argparse
import json
import os
from terminaltables import AsciiTable
from collections import OrderedDict
parser = argparse.ArgumentParser(description='YOLACT COCO Evaluation')
parser.add_argument('--config', default=None, help='The config object of the model.')
parser.add_argument('--trained_model', default='weights/yolact_base_54_800000.pth', type=str)
parser.add_argument('--visual_top_k', default=5, type=int, help='Further restrict the number of predictions to parse')
parser.add_argument('--traditional_nms', default=False, action='store_true', help='Whether to use traditional nms.')
parser.add_argument('--max_num', default=-1, type=int, help='The maximum number of images for test, set to -1 for all.')
parser.add_argument('--cocoapi', action='store_true', help='Whether to use cocoapi to evaluate results.')
parser.add_argument('--benchmark', default=False, action='store_true', help='do benchmark')
def prep_benchmark(dets_out, h, w):
with timer.env('After NMS'):
results = after_nms(dets_out, w, h)
with timer.env('Copy'):
classes, scores, boxes, masks = [x[:args.visual_top_k].cpu().numpy() for x in results]
with timer.env('Sync'):
torch.cuda.synchronize()
class Make_json:
def __init__(self):
self.bbox_data = []
self.mask_data = []
self.coco_cats = {}
for coco_id, real_id in COCO_LABEL_MAP.items():
class_id = real_id - 1
self.coco_cats[class_id] = coco_id
def add_bbox(self, image_id: int, category_id: int, bbox: list, score: float):
""" Note that bbox should be a list or tuple of (x1, y1, x2, y2) """
bbox = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
# Round to the nearest 10th to avoid huge file sizes, as COCO suggests
bbox = [round(float(x) * 10) / 10 for x in bbox]
self.bbox_data.append({'image_id': int(image_id),
'category_id': self.coco_cats[int(category_id)],
'bbox': bbox,
'score': float(score)})
def add_mask(self, image_id: int, category_id: int, segmentation: np.ndarray, score: float):
""" The segmentation should be the full mask, the size of the image and with size [h, w]. """
rle = pycocotools.mask.encode(np.asfortranarray(segmentation.astype(np.uint8)))
rle['counts'] = rle['counts'].decode('ascii') # json.dump doesn't like bytes strings
self.mask_data.append({'image_id': int(image_id),
'category_id': self.coco_cats[int(category_id)],
'segmentation': rle,
'score': float(score)})
def dump(self):
dump_arguments = [(self.bbox_data, f'{json_path}/bbox_detections.json'),
(self.mask_data, f'{json_path}/mask_detections.json')]
for data, path in dump_arguments:
with open(path, 'w') as f:
json.dump(data, f)
class APDataObject:
"""Stores all the information necessary to calculate the AP for one IoU and one class."""
def __init__(self):
self.data_points = []
self.num_gt_positives = 0
def push(self, score: float, is_true: bool):
self.data_points.append((score, is_true))
def add_gt_positives(self, num_positives: int):
""" Call this once per image. """
self.num_gt_positives += num_positives
def is_empty(self) -> bool:
return len(self.data_points) == 0 and self.num_gt_positives == 0
def get_ap(self) -> float:
""" Warning: result not cached. """
if self.num_gt_positives == 0:
return 0
# Sort descending by score
self.data_points.sort(key=lambda x: -x[0])
precisions = []
recalls = []
num_true = 0
num_false = 0
# Compute the precision-recall curve. The x axis is recalls and the y axis precisions.
for datum in self.data_points:
# datum[1] is whether the detection a true or false positive
if datum[1]:
num_true += 1
else:
num_false += 1
precision = num_true / (num_true + num_false)
recall = num_true / self.num_gt_positives
precisions.append(precision)
recalls.append(recall)
# Smooth the curve by computing [max(precisions[i:]) for i in range(len(precisions))]
# Basically, remove any temporary dips from the curve.
# At least that's what I think, idk. COCOEval did it so I do too.
for i in range(len(precisions) - 1, 0, -1):
if precisions[i] > precisions[i - 1]:
precisions[i - 1] = precisions[i]
# Compute the integral of precision(recall) d_recall from recall=0->1 using fixed-length riemann summation with 101 bars.
y_range = [0] * 101 # idx 0 is recall == 0.0 and idx 100 is recall == 1.00
x_range = np.array([x / 100 for x in range(101)])
recalls = np.array(recalls)
# I realize this is weird, but all it does is find the nearest precision(x) for a given x in x_range.
# Basically, if the closest recall we have to 0.01 is 0.009 this sets precision(0.01) = precision(0.009).
# I approximate the integral this way, because that's how COCOEval does it.
indices = np.searchsorted(recalls, x_range, side='left')
for bar_idx, precision_idx in enumerate(indices):
if precision_idx < len(precisions):
y_range[bar_idx] = precisions[precision_idx]
# Finally compute the riemann sum to get our integral.
# avg([precision(x) for x in 0:0.01:1])
return sum(y_range) / len(y_range)
def prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd, image_id, make_json, cocoapi):
""" Returns a list of APs for this image, with each element being for a class """
with timer.env('After NMS'):
class_ids, classes, boxes, masks = after_nms(nms_outs, h, w)
if class_ids.size(0) == 0:
return
class_ids = list(class_ids.cpu().numpy().astype(int))
classes = list(classes.cpu().numpy().astype(float))
masks = masks.view(-1, h * w).cuda()
boxes = boxes.cuda()
if cocoapi:
with timer.env('Output json'):
boxes = boxes.cpu().numpy()
masks = masks.view(-1, h, w).cpu().numpy()
for i in range(masks.shape[0]):
# Make sure that the bounding box actually makes sense and a mask was produced
if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0:
make_json.add_bbox(image_id, class_ids[i], boxes[i, :], classes[i])
make_json.add_mask(image_id, class_ids[i], masks[i, :, :], classes[i])
return
with timer.env('Prepare gt'):
gt_boxes = torch.Tensor(gt[:, :4])
gt_boxes[:, [0, 2]] *= w
gt_boxes[:, [1, 3]] *= h
gt_classes = list(gt[:, 4].astype(int))
gt_masks = torch.Tensor(gt_masks).view(-1, h * w)
if num_crowd > 0:
split = lambda x: (x[-num_crowd:], x[:-num_crowd])
crowd_boxes, gt_boxes = split(gt_boxes)
crowd_masks, gt_masks = split(gt_masks)
crowd_classes, gt_classes = split(gt_classes)
with timer.env('Eval Setup'):
num_pred = len(class_ids)
num_gt = len(gt_classes)
mask_iou_cache = mask_iou(masks, gt_masks)
bbox_iou_cache = bbox_iou(boxes.float(), gt_boxes.float())
if num_crowd > 0:
crowd_mask_iou_cache = mask_iou(masks, crowd_masks, iscrowd=True)
crowd_bbox_iou_cache = bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True)
else:
crowd_mask_iou_cache = None
crowd_bbox_iou_cache = None
iou_types = [('box', lambda i, j: bbox_iou_cache[i, j].item(), lambda i, j: crowd_bbox_iou_cache[i, j].item()),
('mask', lambda i, j: mask_iou_cache[i, j].item(), lambda i, j: crowd_mask_iou_cache[i, j].item())]
timer.start('Main loop')
for _class in set(class_ids + gt_classes):
num_gt_for_class = sum([1 for x in gt_classes if x == _class])
for iouIdx in range(len(iou_thresholds)):
iou_threshold = iou_thresholds[iouIdx]
for iou_type, iou_func, crowd_func in iou_types:
gt_used = [False] * len(gt_classes)
ap_obj = ap_data[iou_type][iouIdx][_class]
ap_obj.add_gt_positives(num_gt_for_class)
for i in range(num_pred):
if class_ids[i] != _class:
continue
max_iou_found = iou_threshold
max_match_idx = -1
for j in range(num_gt):
if gt_used[j] or gt_classes[j] != _class:
continue
iou = iou_func(i, j)
if iou > max_iou_found:
max_iou_found = iou
max_match_idx = j
if max_match_idx >= 0:
gt_used[max_match_idx] = True
ap_obj.push(classes[i], True)
else:
# If the detection matches a crowd, we can just ignore it
matched_crowd = False
if num_crowd > 0:
for j in range(len(crowd_classes)):
if crowd_classes[j] != _class:
continue
iou = crowd_func(i, j)
if iou > iou_threshold:
matched_crowd = True
break
# All this crowd code so that we can make sure that our eval code gives the
# same result as COCOEval. There aren't even that many crowd annotations to
# begin with, but accuracy is of the utmost importance.
if not matched_crowd:
ap_obj.push(classes[i], False)
timer.stop('Main loop')
def calc_map(ap_data):
print('\nCalculating mAP...')
aps = [{'box': [], 'mask': []} for _ in iou_thresholds]
for _class in range(len(cfg.dataset.class_names)):
for iou_idx in range(len(iou_thresholds)):
for iou_type in ('box', 'mask'):
ap_obj = ap_data[iou_type][iou_idx][_class]
if not ap_obj.is_empty():
aps[iou_idx][iou_type].append(ap_obj.get_ap())
all_maps = {'box': OrderedDict(), 'mask': OrderedDict()}
for iou_type in ('box', 'mask'):
all_maps[iou_type]['all'] = 0 # Make this first in the ordereddict
for i, threshold in enumerate(iou_thresholds):
mAP = sum(aps[i][iou_type]) / len(aps[i][iou_type]) * 100 if len(aps[i][iou_type]) > 0 else 0
all_maps[iou_type][int(threshold * 100)] = mAP
all_maps[iou_type]['all'] = (sum(all_maps[iou_type].values()) / (len(all_maps[iou_type].values()) - 1))
row1 = list(all_maps['box'].keys())
row1.insert(0, ' ')
row2 = list(all_maps['box'].values())
row2 = [round(aa, 2) for aa in row2]
row2.insert(0, 'box')
row3 = list(all_maps['mask'].values())
row3 = [round(aa, 2) for aa in row3]
row3.insert(0, 'mask')
table = [row1, row2, row3]
table = AsciiTable(table)
return table.table
def evaluate(net, dataset, max_num=-1, during_training=False, benchmark=False, cocoapi=False, traditional_nms=False):
frame_times = MovingAverage()
dataset_size = len(dataset) if max_num < 0 else min(max_num, len(dataset))
dataset_indices = list(range(len(dataset)))
dataset_indices = dataset_indices[:dataset_size]
progress_bar = ProgressBar(40, dataset_size)
if benchmark:
timer.disable('Data loading')
else:
# For each class and iou, stores tuples (score, isPositive)
# Index ap_data[type][iouIdx][classIdx]
ap_data = {'box': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds],
'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds]}
make_json = Make_json()
for i, image_idx in enumerate(dataset_indices):
timer.reset()
with timer.env('Data loading'):
img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx)
batch = Variable(img.unsqueeze(0))
if cuda:
batch = batch.cuda()
with timer.env('Network forward'):
net_outs = net(batch)
nms_outs = NMS(net_outs, traditional_nms)
if benchmark:
prep_benchmark(nms_outs, h, w)
else:
prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], make_json, cocoapi)
# First couple of images take longer because we're constructing the graph.
# Since that's technically initialization, don't include those in the FPS calculations.
fps = 0
if i > 1 and not during_training:
frame_times.add(timer.total_time())
fps = 1 / frame_times.get_avg()
progress = (i + 1) / dataset_size * 100
progress_bar.set_val(i + 1)
print('\rProcessing: %s %d / %d (%.2f%%) %.2f fps ' % (
repr(progress_bar), i + 1, dataset_size, progress, fps), end='')
if benchmark:
print('\n\nStats for the last frame:')
timer.print_stats()
avg_seconds = frame_times.get_avg()
print('Average: %5.2f fps, %5.2f ms' % (1 / frame_times.get_avg(), 1000 * avg_seconds))
else:
if cocoapi:
make_json.dump()
print(f'\nJson files dumped, saved in: {json_path}.')
return
table = calc_map(ap_data)
print(table)
return table
iou_thresholds = [x / 100 for x in range(50, 100, 5)]
cuda = torch.cuda.is_available()
if __name__ == '__main__':
args = parser.parse_args()
json_path = 'results'
if not os.path.exists(json_path):
os.mkdir(json_path)
if args.config is None:
piece = args.trained_model.split('/')[1].split('_')
name = f'{piece[0]}_{piece[1]}_config'
print(f'\nConfig not specified. Parsed \'{name}\' from the checkpoint name.\n')
set_cfg(name)
with torch.no_grad():
if cuda:
cudnn.benchmark = True
cudnn.fastest = True
torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
torch.set_default_tensor_type('torch.FloatTensor')
dataset = COCODetection(cfg.dataset.valid_images, cfg.dataset.valid_info, augmentation=BaseTransform())
print('Loading model...')
net = Yolact()
net.load_weights(args.trained_model)
net.eval()
if cuda:
net = net.cuda()
evaluate(net, dataset, args.max_num, False, args.benchmark, args.cocoapi, args.traditional_nms)