diff --git a/dl_lib/data/datasets/builtin.py b/dl_lib/data/datasets/builtin.py index 457bce5..d2272e2 100644 --- a/dl_lib/data/datasets/builtin.py +++ b/dl_lib/data/datasets/builtin.py @@ -53,6 +53,13 @@ "coco/annotations/instances_val2017_100.json"), } +_PREDEFINED_SPLITS_COCO["multi_metal"] = { + "multi_metal_coco_2014_train": + ("train2014", "annotations/instances_train2014.json"), + "multi_metal_coco_2014_val": + ("val2014", "annotations/instances_val2014.json"), +} + def register_all_coco(root=osp.join( osp.split(osp.split(dl_lib.__file__)[0])[0], "datasets")): @@ -87,5 +94,5 @@ def register_all_pascal_voc(root=osp.join( # Register them all under "./datasets" -register_all_coco() +register_all_coco(root=r'E:\dataset\uncompressed') register_all_pascal_voc() diff --git a/dl_lib/data/datasets/builtin_meta.py b/dl_lib/data/datasets/builtin_meta.py index bb311a7..6fc7eff 100644 --- a/dl_lib/data/datasets/builtin_meta.py +++ b/dl_lib/data/datasets/builtin_meta.py @@ -140,6 +140,18 @@ {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"}, ] +MULTI_METAL_COCO_CATEGORIES = [ + {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "flat"}, + {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "flat_back"}, + {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "four_flat"}, + {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "four_hole"}, + {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "metal_three"}, + {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "metal_three_back"}, + {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "one_hole_back"}, + {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "one_hole_front"}, + {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "two_back"}, + {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "two_front"}, +] def _get_coco_instances_meta(): thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1] @@ -155,6 +167,20 @@ def _get_coco_instances_meta(): } return ret +def _get_multi_metal_coco_instances_meta(): + thing_ids = [k["id"] for k in MULTI_METAL_COCO_CATEGORIES if k["isthing"] == 1] + thing_colors = [k["color"] for k in MULTI_METAL_COCO_CATEGORIES if k["isthing"] == 1] + assert len(thing_ids) == 10, len(thing_ids) + # Mapping from the incontiguous COCO category id to an id in [0, 79] + thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} + thing_classes = [k["name"] for k in MULTI_METAL_COCO_CATEGORIES if k["isthing"] == 1] + ret = { + "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, + "thing_classes": thing_classes, + "thing_colors": thing_colors, + } + return ret + def _get_builtin_metadata(dataset_name): if dataset_name == "coco": @@ -175,4 +201,6 @@ def _get_builtin_metadata(dataset_name): "thing_classes": CITYSCAPES_THING_CLASSES, "stuff_classes": CITYSCAPES_STUFF_CLASSES, } + elif dataset_name == "multi_metal": + return _get_multi_metal_coco_instances_meta() raise KeyError("No built-in metadata for dataset {}".format(dataset_name)) diff --git a/dl_lib/engine/defaults.py b/dl_lib/engine/defaults.py index cd323b9..6cb0fb9 100755 --- a/dl_lib/engine/defaults.py +++ b/dl_lib/engine/defaults.py @@ -12,6 +12,7 @@ import argparse import logging import os +import sys from collections import OrderedDict import torch @@ -66,7 +67,7 @@ def default_argument_parser(): # PyTorch still may leave orphan processes in multi-gpu training. # Therefore we use a deterministic way to obtain port, # so that users are aware of orphan processes by seeing the port occupied. - port = 2 ** 15 + 2 ** 14 + hash(os.getuid()) % 2 ** 14 + port = 2 ** 15 + 2 ** 14 + hash(1 if sys.platform == "win32" else os.getuid()) % 2 ** 14 parser.add_argument("--dist-url", default="tcp://127.0.0.1:{}".format(port)) parser.add_argument( "opts", diff --git a/dl_lib/evaluation/coco_evaluation.py b/dl_lib/evaluation/coco_evaluation.py index 7371bd5..2dc96a6 100755 --- a/dl_lib/evaluation/coco_evaluation.py +++ b/dl_lib/evaluation/coco_evaluation.py @@ -88,7 +88,8 @@ def _tasks_from_config(self, cfg): tasks = ("bbox",) if cfg.MODEL.MASK_ON: - tasks = tasks + ("segm",) + #tasks = tasks + ("segm",) + pass if cfg.MODEL.KEYPOINT_ON: tasks = tasks + ("keypoints",) return tasks diff --git a/dl_lib/evaluation/evaluator.py b/dl_lib/evaluation/evaluator.py index 15f44f6..2e6dabf 100755 --- a/dl_lib/evaluation/evaluator.py +++ b/dl_lib/evaluation/evaluator.py @@ -101,7 +101,8 @@ def inference_on_dataset(model, data_loader, evaluator): Returns: The return value of `evaluator.evaluate()` """ - num_devices = torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1 + #num_devices = torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1 + num_devices = 1 logger = logging.getLogger(__name__) logger.info("Start inference on {} images".format(len(data_loader))) @@ -120,6 +121,7 @@ def inference_on_dataset(model, data_loader, evaluator): start_compute_time = time.time() outputs = model(inputs) + draw_result(inputs, outputs) if torch.cuda.is_available(): torch.cuda.synchronize() total_compute_time += time.time() - start_compute_time @@ -160,6 +162,19 @@ def inference_on_dataset(model, data_loader, evaluator): results = {} return results +def draw_result(inputs, outputs): + import cv2 + for input, output in zip(inputs, outputs): + file_name = input['file_name'] + image = cv2.imread(file_name) + pred_segmentation = output['instances'].get('pred_segmentation') + pred_bbox = output['instances'].get('pred_boxes').tensor + for segmentation, bbox in zip(pred_segmentation, pred_bbox): + for idx in range(0, segmentation.shape[0], 2): + cv2.circle(image, (int(segmentation[idx]), int(segmentation[idx+1])), 2, (0, 255, 0), 0) + cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) + cv2.imwrite(r'D:\project\COCO_MetalMulti\result\result.jpg', image) + @contextmanager def inference_context(model): diff --git a/dl_lib/layers/ROIAlign/ROIAlign_cuda.cu b/dl_lib/layers/ROIAlign/ROIAlign_cuda.cu index 3f49edc..26702d1 100644 --- a/dl_lib/layers/ROIAlign/ROIAlign_cuda.cu +++ b/dl_lib/layers/ROIAlign/ROIAlign_cuda.cu @@ -307,6 +307,10 @@ __global__ void RoIAlignBackwardFeature( namespace dl_lib { +int ceil_div(int a, int b){ + return (a + b - 1) / b; +} + at::Tensor ROIAlign_forward_cuda( const at::Tensor& input, const at::Tensor& rois, @@ -334,7 +338,9 @@ at::Tensor ROIAlign_forward_cuda( auto output_size = num_rois * pooled_height * pooled_width * channels; cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - dim3 grid(std::min(at::cuda::ATenCeilDiv(output_size, 512L), 4096L)); + dim3 grid(std::min(at::cuda::ATenCeilDiv(static_cast(output_size), + static_cast(512)), + static_cast(4096))); dim3 block(512); if (output.numel() == 0) { @@ -390,7 +396,9 @@ at::Tensor ROIAlign_backward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - dim3 grid(std::min(at::cuda::ATenCeilDiv(grad.numel(), 512L), 4096L)); + dim3 grid(std::min(at::cuda::ATenCeilDiv(static_cast(grad.numel()), + static_cast(512)), + static_cast(4096))); dim3 block(512); // handle possibly empty gradients diff --git a/dl_lib/network/centernet.py b/dl_lib/network/centernet.py index f7b36b3..8c84b24 100644 --- a/dl_lib/network/centernet.py +++ b/dl_lib/network/centernet.py @@ -10,6 +10,8 @@ from .generator import CenterNetDecoder, CenterNetGT from .loss import modified_focal_loss, reg_l1_loss +import matplotlib.pyplot as plt + class CenterNet(nn.Module): """ @@ -73,9 +75,9 @@ def forward(self, batched_inputs): gt_dict = self.get_ground_truth(batched_inputs) - return self.losses(pred_dict, gt_dict) + return self.losses(pred_dict, gt_dict, images) - def losses(self, pred_dict, gt_dict): + def losses(self, pred_dict, gt_dict, images): r""" calculate losses of pred and gt @@ -107,19 +109,37 @@ def losses(self, pred_dict, gt_dict): index = gt_dict['index'] index = index.to(torch.long) # width and height loss, better version - loss_wh = reg_l1_loss(pred_dict['wh'], mask, index, gt_dict['wh']) + loss_wh, _, _ = reg_l1_loss(pred_dict['wh'], mask, index, gt_dict['wh']) # regression loss - loss_reg = reg_l1_loss(pred_dict['reg'], mask, index, gt_dict['reg']) + loss_reg, _, _ = reg_l1_loss(pred_dict['reg'], mask, index, gt_dict['reg']) + loss_segmentation_x, pred_x_s, gt_x_s = reg_l1_loss(pred_dict['segmentation_x'], mask, index, gt_dict['segmentation_x']) + loss_segmentation_y, pred_y_s, gt_y_s = reg_l1_loss(pred_dict['segmentation_y'], mask, index, gt_dict['segmentation_y']) + + + for pred_x, gt_x, pred_y, gt_y in zip(pred_x_s[0], gt_x_s[0], pred_y_s[0], gt_y_s[0]): + pred_x = pred_x.cpu().data.numpy() * 512 + gt_x = gt_x.cpu().data.numpy() * 512 + pred_y = pred_y.cpu().data.numpy() * 512 + gt_y = gt_y.cpu().data.numpy() * 512 + # plt.scatter(i[:, 1], i[:, 0], color='b') + plt.imshow(np.transpose(images[0].cpu().data.numpy(), (1, 2, 0))) + plt.scatter(gt_x, gt_y, color='g') + plt.scatter(pred_x, pred_y, color='r') + plt.show() loss_cls *= self.cfg.MODEL.LOSS.CLS_WEIGHT loss_wh *= self.cfg.MODEL.LOSS.WH_WEIGHT loss_reg *= self.cfg.MODEL.LOSS.REG_WEIGHT + loss_segmentation_x *= self.cfg.MODEL.LOSS.SEG_WEIGHT + loss_segmentation_y *= self.cfg.MODEL.LOSS.SEG_WEIGHT loss = { "loss_cls": loss_cls, "loss_box_wh": loss_wh, "loss_center_reg": loss_reg, + "loss_segmentation_x": loss_segmentation_x, + "loss_segmentation_y": loss_segmentation_y, } # print(loss) return loss @@ -168,8 +188,12 @@ def decode_prediction(self, pred_dict, img_info): fmap = pred_dict["cls"] reg = pred_dict["reg"] wh = pred_dict["wh"] + segmentation_x = pred_dict["segmentation_x"] if 'segmentation_x' in pred_dict else None + segmentation_y = pred_dict["segmentation_y"] if 'segmentation_y' in pred_dict else None + segmentation = (segmentation_x, segmentation_y) if segmentation_x is not None and segmentation_y is not None \ + else None - boxes, scores, classes = CenterNetDecoder.decode(fmap, wh, reg) + boxes, scores, classes, segmentation = CenterNetDecoder.decode(fmap, wh, reg, segmentation=segmentation) # boxes = Boxes(boxes.reshape(boxes.shape[-2:])) scores = scores.reshape(-1) classes = classes.reshape(-1).to(torch.int64) @@ -177,7 +201,8 @@ def decode_prediction(self, pred_dict, img_info): # dets = CenterNetDecoder.decode(fmap, wh, reg) boxes = CenterNetDecoder.transform_boxes(boxes, img_info) boxes = Boxes(boxes) - return dict(pred_boxes=boxes, scores=scores, pred_classes=classes) + segmentation = CenterNetDecoder.transform_segmentation(segmentation, img_info) + return dict(pred_boxes=boxes, scores=scores, pred_classes=classes, pred_segmentation=segmentation) def preprocess_image(self, batched_inputs): """ diff --git a/dl_lib/network/generator/centernet_decode.py b/dl_lib/network/generator/centernet_decode.py index 0369290..28b5413 100644 --- a/dl_lib/network/generator/centernet_decode.py +++ b/dl_lib/network/generator/centernet_decode.py @@ -14,7 +14,7 @@ class CenterNetDecoder(object): @staticmethod - def decode(fmap, wh, reg=None, cat_spec_wh=False, K=100): + def decode(fmap, wh, reg=None, cat_spec_wh=False, K=100, segmentation=None): r""" decode output feature map to detection results @@ -47,6 +47,16 @@ def decode(fmap, wh, reg=None, cat_spec_wh=False, K=100): else: wh = wh.reshape(batch, K, 2) + if segmentation is not None: + segmentation_x = gather_feature(segmentation[0], index, use_transform=True) + segmentation_y = gather_feature(segmentation[1], index, use_transform=True) + batch_size = segmentation_x.shape[0] + objects_num = segmentation_x.shape[1] + points_num = segmentation_x.shape[2] + segmentation = torch.zeros((batch_size, objects_num, points_num*2)) + segmentation[:, :, 0::2] = segmentation_x + segmentation[:, :, 1::2] = segmentation_y + clses = clses.reshape(batch, K, 1).float() scores = scores.reshape(batch, K, 1) @@ -55,7 +65,7 @@ def decode(fmap, wh, reg=None, cat_spec_wh=False, K=100): xs + half_w, ys + half_h], dim=2) - detections = (bboxes, scores, clses) + detections = (bboxes, scores, clses, segmentation) return detections @@ -82,6 +92,29 @@ def transform_boxes(boxes, img_info, scale=1): target_boxes = np.dot(aug_coords, trans.T).reshape(-1, 4) return target_boxes + @staticmethod + def transform_segmentation(boxes, img_info, scale=1): + r""" + transform predicted boxes to target boxes + + Args: + boxes(Tensor): torch Tensor with (Batch, N, 4) shape + img_info(dict): dict contains all information of original image + scale(float): used for multiscale testing + """ + boxes = boxes.cpu().numpy().reshape(-1, 8) + + center = img_info['center'] + size = img_info['size'] + output_size = (img_info['width'], img_info['height']) + src, dst = CenterAffine.generate_src_and_dst(center, size, output_size) + trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) + + coords = boxes.reshape(-1, 2) + aug_coords = np.column_stack((coords, np.ones(coords.shape[0]))) + target_segmentation = np.dot(aug_coords, trans.T).reshape(-1, 8) + return target_segmentation + @staticmethod def pseudo_nms(fmap, pool_size=3): r""" @@ -107,13 +140,13 @@ def topk_score(scores, K=40): topk_scores, topk_inds = torch.topk(scores.reshape(batch, channel, -1), K) topk_inds = topk_inds % (height * width) - topk_ys = (topk_inds / width).int().float() + topk_ys = (topk_inds.true_divide(width)).int().float() topk_xs = (topk_inds % width).int().float() # get all topk in in a batch topk_score, index = torch.topk(topk_scores.reshape(batch, -1), K) # div by K because index is grouped by K(C x K shape) - topk_clses = (index / K).int() + topk_clses = (index.true_divide(K)).int() topk_inds = gather_feature(topk_inds.view(batch, -1, 1), index).reshape(batch, K) topk_ys = gather_feature(topk_ys.reshape(batch, -1, 1), index).reshape(batch, K) topk_xs = gather_feature(topk_xs.reshape(batch, -1, 1), index).reshape(batch, K) diff --git a/dl_lib/network/generator/centernet_gt.py b/dl_lib/network/generator/centernet_gt.py index 60b172e..66a46a1 100644 --- a/dl_lib/network/generator/centernet_gt.py +++ b/dl_lib/network/generator/centernet_gt.py @@ -15,8 +15,11 @@ def generate(config, batched_input): output_size = config.INPUT.OUTPUT_SIZE min_overlap = config.MODEL.CENTERNET.MIN_OVERLAP tensor_dim = config.MODEL.CENTERNET.TENSOR_DIM + num_polygons_points = config.MODEL.CENTERNET.NUM_POLYGON_POINTS scoremap_list, wh_list, reg_list, reg_mask_list, index_list = [[] for i in range(5)] + segmentation_list_x = [] + segmentation_list_y = [] for data in batched_input: # img_size = (data['height'], data['width']) @@ -28,6 +31,8 @@ def generate(config, batched_input): gt_reg = torch.zeros_like(gt_wh) reg_mask = torch.zeros(tensor_dim) gt_index = torch.zeros(tensor_dim) + gt_segmentation_x = torch.ones(tensor_dim, num_polygons_points) * -128 + gt_segmentation_y = torch.ones(tensor_dim, num_polygons_points) * -128 # pass boxes, classes = bbox_dict['gt_boxes'], bbox_dict['gt_classes'] @@ -50,11 +55,28 @@ def generate(config, batched_input): ) gt_wh[:num_boxes] = wh + masks = bbox_dict['gt_masks'] + gt_segmentation_x[:num_boxes], gt_segmentation_y[:num_boxes] = \ + masks.normalized_by_length(box_tensor.numpy(), + num_polygons_points, + box_scale) + gt_segmentation = torch.zeros(num_boxes, num_polygons_points * 2) + gt_segmentation[:, 0::2] = gt_segmentation_x[:num_boxes] + gt_segmentation[:, 1::2] = gt_segmentation_y[:num_boxes] + import cv2 + image = data['image'].numpy().transpose((1, 2, 0)) + image = cv2.resize(image, (128, 128)) + for bbox in box_tensor: + cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) + cv2.imwrite(r'D:\project\COCO_MetalMulti\result\result.jpg', image) + scoremap_list.append(gt_scoremap) wh_list.append(gt_wh) reg_list.append(gt_reg) reg_mask_list.append(reg_mask) index_list.append(gt_index) + segmentation_list_x.append(gt_segmentation_x) + segmentation_list_y.append(gt_segmentation_y) gt_dict = { "score_map": torch.stack(scoremap_list, dim=0), @@ -62,6 +84,8 @@ def generate(config, batched_input): "reg": torch.stack(reg_list, dim=0), "reg_mask": torch.stack(reg_mask_list, dim=0), "index": torch.stack(index_list, dim=0), + "segmentation_x": torch.stack(segmentation_list_x, dim=0), + "segmentation_y": torch.stack(segmentation_list_y, dim=0), } return gt_dict diff --git a/dl_lib/network/head/centernet_head.py b/dl_lib/network/head/centernet_head.py index e1f59b7..f4070f9 100644 --- a/dl_lib/network/head/centernet_head.py +++ b/dl_lib/network/head/centernet_head.py @@ -36,15 +36,50 @@ def __init__(self, cfg): ) self.wh_head = SingleHead(64, 2) self.reg_head = SingleHead(64, 2) + self.segmentation_head_x = SegHead(num_polygon_points=cfg.MODEL.CENTERNET.NUM_POLYGON_POINTS) + self.segmentation_head_y = SegHead(num_polygon_points=cfg.MODEL.CENTERNET.NUM_POLYGON_POINTS) def forward(self, x): cls = self.cls_head(x) cls = torch.sigmoid(cls) wh = self.wh_head(x) reg = self.reg_head(x) + segmentation_x = self.segmentation_head_x(x) + segmentation_y = self.segmentation_head_y(x) pred = { 'cls': cls, 'wh': wh, - 'reg': reg + 'reg': reg, + 'segmentation_x': segmentation_x, + 'segmentation_y': segmentation_y } return pred + +class SegHead(nn.Module): + def __init__(self, num_convs=2, in_channels=64, conv_out_channels=64, conv_kernel_size=3, num_polygon_points=4): + super(SegHead, self).__init__() + self.num_convs = num_convs + self.in_channels = in_channels + self.conv_out_channels = conv_out_channels + self.conv_kernel_size = conv_kernel_size + self.relu = nn.ReLU(inplace=True) + self.out_conv = nn.Conv2d(conv_out_channels, num_polygon_points, 1) + + self.convs = nn.ModuleList() + for i in range(self.num_convs): + in_channels = ( + self.in_channels if i == 0 else self.conv_out_channels) + padding = (self.conv_kernel_size - 1) // 2 + self.convs.append( + nn.Conv2d( + in_channels, + self.conv_out_channels, + self.conv_kernel_size, + padding=padding,)) + + def forward(self, x): + for conv in self.convs: + x = conv(x) + x = self.relu(x) + x = self.out_conv(x) + return x diff --git a/dl_lib/network/loss/reg_l1_loss.py b/dl_lib/network/loss/reg_l1_loss.py index dee718e..7e90e3f 100644 --- a/dl_lib/network/loss/reg_l1_loss.py +++ b/dl_lib/network/loss/reg_l1_loss.py @@ -11,4 +11,4 @@ def reg_l1_loss(output, mask, index, target): # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') loss = F.l1_loss(pred * mask, target * mask, reduction='sum') loss = loss / (mask.sum() + 1e-4) - return loss + return loss, pred * mask, target * mask diff --git a/dl_lib/structures/masks.py b/dl_lib/structures/masks.py index 7b2cb5e..d3a77ce 100755 --- a/dl_lib/structures/masks.py +++ b/dl_lib/structures/masks.py @@ -394,6 +394,61 @@ def area(self): return torch.tensor(area) + def normalized_by_length(self, bboxes, num_of_target, bbox_scale): + if bboxes.shape[0] == 0: + return torch.ones(0, num_of_target) * -128, torch.ones(0, num_of_target) * -128 + + polygons_x = [] + polygons_y = [] + for idx, polygons_per_instance in enumerate(self.polygons): + # sum_of_points = sum([int(line.shape[0]/2) for line in polygons_per_instance]) + # lines_per_polygons_x = [] + # lines_per_polygons_y = [] + # bbox = bboxes[idx] + # for line in polygons_per_instance: + # num_of_points = int(line.shape[0]/2) + # line *= bbox_scale + # line = np.roll(line, -np.argwhere(line==line[np.argwhere(line[1::2]==line[1::2].min())*2].min()).min()) + # normalized_line_x = (line[0::2] - np.tile(bbox[0], num_of_points)) + # normalized_line_y = (line[1::2] - np.tile(bbox[1], num_of_points)) + # lines_per_polygons_x.append(normalized_line_x) + # lines_per_polygons_y.append(normalized_line_y) + # if sum_of_points > num_of_target: + # sample_idx = np.linspace(0, sum_of_points, num=num_of_target, endpoint=False, dtype=np.int32) + # sum_of_points = int(num_of_target) + # else: + # sample_idx = np.arange(0, sum_of_points) + # target_numpy_x = np.ones(num_of_target) * -128 + # target_numpy_x[:sum_of_points] = np.concatenate(lines_per_polygons_x)[sample_idx] + # target_numpy_y = np.ones(num_of_target) * -128 + # target_numpy_y[:sum_of_points] = np.concatenate(lines_per_polygons_y)[sample_idx] + line = np.concatenate(polygons_per_instance) * bbox_scale + + min_in_x = np.squeeze(np.argwhere(line[0::2] == line[0::2].min())) + left_in_x = line[0::2].min() + left_in_y = line[line == line[min_in_x * 2 + 1].max()].min() + + min_in_y = np.squeeze(np.argwhere(line[1::2] == line[1::2].min())) + up_in_x = line[line == line[min_in_y * 2].min()].min() + up_in_y = line[1::2].min() + + max_in_x = np.squeeze(np.argwhere(line[0::2] == line[0::2].max())) + right_in_x = line[0::2].max() + right_in_y = line[line == line[max_in_x * 2 + 1].min()].min() + + max_in_y = np.squeeze(np.argwhere(line[1::2] == line[1::2].max())) + down_in_x = line[line == line[max_in_y * 2].max()].min() + down_in_y = line[1::2].max() + + target_numpy_x = np.array([left_in_x, up_in_x, right_in_x, down_in_x]) / 512 + target_numpy_y = np.array([left_in_y, up_in_y, right_in_y, down_in_y]) / 512 + + polygons_x.append(target_numpy_x) + polygons_y.append(target_numpy_y) + + return torch.tensor(polygons_x), torch.tensor(polygons_y) + + @staticmethod def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks": """ diff --git a/playground/centernet.res18.coco.512size/config.py b/playground/centernet.res18.coco.512size/config.py index b64e481..091725d 100644 --- a/playground/centernet.res18.coco.512size/config.py +++ b/playground/centernet.res18.coco.512size/config.py @@ -3,12 +3,13 @@ _config_dict = dict( MODEL=dict( - WEIGHTS="", - RESNETS=dict(DEPTH=18), + WEIGHTS=r"D:\git_projects\CenterNet-better\playground\centernet.res18.coco.512size\exp_3\model_final.pth", + MASK_ON=True, + RESNETS=dict(DEPTH=101), PIXEL_MEAN=[0.485, 0.456, 0.406], PIXEL_STD=[0.229, 0.224, 0.225], CENTERNET=dict( - DECONV_CHANNEL=[512, 256, 128, 64], + DECONV_CHANNEL=[2048, 256, 128, 64], DECONV_KERNEL=[4, 4, 4], NUM_CLASSES=80, MODULATE_DEFORM=True, @@ -16,11 +17,13 @@ DOWN_SCALE=4, MIN_OVERLAP=0.7, TENSOR_DIM=128, + NUM_POLYGON_POINTS=4, ), LOSS=dict( CLS_WEIGHT=1, WH_WEIGHT=0.1, REG_WEIGHT=1, + SEG_WEIGHT=2, ), ), INPUT=dict( @@ -52,16 +55,16 @@ SOLVER=dict( OPTIMIZER=dict( NAME="SGD", - BASE_LR=0.02, + BASE_LR=0.002, WEIGHT_DECAY=1e-4, ), LR_SCHEDULER=dict( GAMMA=0.1, STEPS=(81000, 108000), - MAX_ITER=126000, + MAX_ITER=253000, WARMUP_ITERS=1000, ), - IMS_PER_BATCH=128, + IMS_PER_BATCH=4, ), OUTPUT_DIR=osp.join( '/data/Outputs/model_logs/playground', diff --git a/playground/centernet.res18.coco.512size/test_net.py b/playground/centernet.res18.coco.512size/test_net.py new file mode 100644 index 0000000..51722d0 --- /dev/null +++ b/playground/centernet.res18.coco.512size/test_net.py @@ -0,0 +1,164 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Modified by Feng Wang +""" +Detection Training Script. + +This scripts reads a given config file and runs the training or evaluation. +It is an entry point that is made to train standard models in dl_lib. + +In order to let one script support training of many models, +this script contains logic that are specific to these built-in models and therefore +may not be suitable for your own project. +For example, your research project perhaps only needs a single "evaluator". + +Therefore, we recommend you to use dl_lib as an library and take +this file as an example of how to use the library. +You may want to write your own script with your datasets and other customizations. +""" +import glob +import logging +import os +import re +import sys +sys.path.insert(0, '.') # noqa: E402 +from collections import OrderedDict + +import dl_lib.utils.comm as comm +from config import config +from dl_lib.checkpoint import DetectionCheckpointer +from dl_lib.data import MetadataCatalog +from dl_lib.engine import (DefaultTrainer, default_argument_parser, + default_setup, launch) +from dl_lib.evaluation import (COCOEvaluator, DatasetEvaluators, + PascalVOCDetectionEvaluator, verify_results) +from net import build_model + + +class Trainer(DefaultTrainer): + """ + We use the "DefaultTrainer" which contains a number pre-defined logic for + standard training workflow. They may not work for you, especially if you + are working on a new research project. In that case you can use the cleaner + "SimpleTrainer", or write your own training loop. + """ + + @classmethod + def build_evaluator(cls, cfg, dataset_name, output_folder=None): + """ + Create evaluator(s) for a given dataset. + This uses the special metadata "evaluator_type" associated with each builtin dataset. + For your own dataset, you can simply create an evaluator manually in your + script and do not have to worry about the hacky if-else logic here. + """ + if output_folder is None: + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") + evaluator_list = [] + evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type + + if evaluator_type in ["coco", "coco_panoptic_seg"]: + evaluator_list.append( + COCOEvaluator( + dataset_name, cfg, True, + output_folder, dump=cfg.GLOBAL.DUMP_TEST + )) + + if evaluator_type == "pascal_voc": + return PascalVOCDetectionEvaluator(dataset_name) + + if len(evaluator_list) == 0: + raise NotImplementedError( + "no Evaluator for the dataset {} with the type {}".format( + dataset_name, evaluator_type + ) + ) + if len(evaluator_list) == 1: + return evaluator_list[0] + return DatasetEvaluators(evaluator_list) + + @classmethod + def test_with_TTA(cls, cfg, model): + logger = logging.getLogger("dl_lib.trainer") + # In the end of training, run an evaluation with TTA + # Only support some R-CNN models. + logger.info("Running inference with test-time augmentation ...") + from dl_lib.modeling import GeneralizedRCNNWithTTA + model = GeneralizedRCNNWithTTA(cfg, model) + evaluators = [ + cls.build_evaluator( + cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") + ) + for name in cfg.DATASETS.TEST + ] + res = cls.test(cfg, model, evaluators) + res = OrderedDict({k + "_TTA": v for k, v in res.items()}) + return res + + +def test_argument_parser(): + parser = default_argument_parser() + parser.add_argument("--start-iter", type=int, default=0, help="start iter used to test") + parser.add_argument("--end-iter", type=int, default=None, + help="end iter used to test") + parser.add_argument("--debug", action="store_true", help="use debug mode or not") + return parser + + +def main(args): + config.merge_from_list(args.opts) + cfg, logger = default_setup(config, args) + if args.debug: + batches = int(cfg.SOLVER.IMS_PER_BATCH / 8 * args.num_gpus) + if cfg.SOLVER.IMS_PER_BATCH != batches: + cfg.SOLVER.IMS_PER_BATCH = batches + logger.warning("SOLVER.IMS_PER_BATCH is changed to {}".format(batches)) + + if "MODEL.WEIGHTS" in args.opts: + valid_files = [cfg.MODEL.WEIGHTS] + else: + list_of_files = glob.glob(os.path.join(cfg.OUTPUT_DIR, '*.pth')) + assert list_of_files, "no pth file found in {}".format(cfg.OUTPUT_DIR) + list_of_files.sort(key=os.path.getctime) + latest_file = list_of_files[-1] + if not args.end_iter: + valid_files = [latest_file] + else: + files = [f for f in list_of_files if str(f) <= str(latest_file)] + valid_files = [] + for f in files: + try: + model_iter = int(re.split(r'(model_|\.pth)', f)[-3]) + except Exception: + logger.warning("remove {}".format(f)) + continue + if args.start_iter <= model_iter <= args.end_iter: + valid_files.append(f) + assert valid_files, "No .pth files satisfy your requirement" + + # * means all if need specific format then *.csv + for current_file in valid_files: + cfg.MODEL.WEIGHTS = current_file + model = build_model(cfg) + + DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( + cfg.MODEL.WEIGHTS, resume=args.resume + ) + res = Trainer.test(cfg, model) + if comm.is_main_process(): + verify_results(cfg, res) + if cfg.TEST.AUG.ENABLED: + res.update(Trainer.test_with_TTA(cfg, model)) + + # return res + + +if __name__ == "__main__": + args = test_argument_parser().parse_args() + print("Command Line Args:", args) + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=(args,), + ) diff --git a/playground/centernet.res18.coco.512size/train_net.py b/playground/centernet.res18.coco.512size/train_net.py new file mode 100644 index 0000000..a144e30 --- /dev/null +++ b/playground/centernet.res18.coco.512size/train_net.py @@ -0,0 +1,127 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Modified by Feng Wang +""" +Detection Training Script. + +This scripts reads a given config file and runs the training or evaluation. +It is an entry point that is made to train standard models in dl_lib. + +In order to let one script support training of many models, +this script contains logic that are specific to these built-in models and therefore +may not be suitable for your own project. +For example, your research project perhaps only needs a single "evaluator". + +Therefore, we recommend you to use dl_lib as an library and take +this file as an example of how to use the library. +You may want to write your own script with your datasets and other customizations. +""" + +import os +import sys +sys.path.insert(0, '.') # noqa: E402 + +from colorama import Fore, Style + +import dl_lib.utils.comm as comm +from config import config +from dl_lib.checkpoint import DetectionCheckpointer +from dl_lib.data import MetadataCatalog +from dl_lib.engine import (DefaultTrainer, default_argument_parser, + default_setup, hooks, launch) +from dl_lib.evaluation import (COCOEvaluator, DatasetEvaluators, + PascalVOCDetectionEvaluator, verify_results) +from net import build_model + + +class Trainer(DefaultTrainer): + """ + We use the "DefaultTrainer" which contains a number pre-defined logic for + standard training workflow. They may not work for you, especially if you + are working on a new research project. In that case you can use the cleaner + "SimpleTrainer", or write your own training loop. + """ + + @classmethod + def build_evaluator(cls, cfg, dataset_name, output_folder=None): + """ + Create evaluator(s) for a given dataset. + This uses the special metadata "evaluator_type" associated with each builtin dataset. + For your own dataset, you can simply create an evaluator manually in your + script and do not have to worry about the hacky if-else logic here. + """ + if output_folder is None: + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") + evaluator_list = [] + evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type + + if evaluator_type in ["coco", "coco_panoptic_seg"]: + evaluator_list.append( + COCOEvaluator( + dataset_name, cfg, True, + output_folder, dump=cfg.GLOBAL.DUMP_TRAIN + )) + elif evaluator_type == "pascal_voc": + return PascalVOCDetectionEvaluator(dataset_name) + + if len(evaluator_list) == 0: + raise NotImplementedError( + "no Evaluator for the dataset {} with the type {}".format( + dataset_name, evaluator_type + ) + ) + elif len(evaluator_list) == 1: + return evaluator_list[0] + return DatasetEvaluators(evaluator_list) + + +def main(args): + config.merge_from_list(args.opts) + cfg, logger = default_setup(config, args) + model = build_model(cfg) + logger.info(f"Model structure: {model}") + if sys.platform == "linux": + file_sys = os.statvfs(cfg.OUTPUT_DIR) + free_space_Gb = (file_sys.f_bfree * file_sys.f_frsize) / 2**30 + # We assume that a single dumped model is 700Mb + eval_space_Gb = (cfg.SOLVER.LR_SCHEDULER.MAX_ITER // cfg.SOLVER.CHECKPOINT_PERIOD) * 700 / 2**10 + if eval_space_Gb > free_space_Gb: + logger.warning(f"{Fore.RED}Remaining space({free_space_Gb}GB) " + f"is less than ({eval_space_Gb}GB){Style.RESET_ALL}") + if args.eval_only: + DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( + cfg.MODEL.WEIGHTS, resume=args.resume + ) + res = Trainer.test(cfg, model) + if comm.is_main_process(): + verify_results(cfg, res) + if cfg.TEST.AUG.ENABLED: + res.update(Trainer.test_with_TTA(cfg, model)) + return res + + """ + If you'd like to do anything fancier than the standard training logic, + consider writing your own training loop or subclassing the trainer. + """ + trainer = Trainer(cfg, model) + trainer.resume_or_load(resume=args.resume) + if cfg.TEST.AUG.ENABLED: + trainer.register_hooks( + [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] + ) + + return trainer.train() + + +if __name__ == "__main__": + args = default_argument_parser().parse_args() + print("soft link to {}".format(config.OUTPUT_DIR)) + config.link_log() + print("Command Line Args:", args) + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=(args,), + ) diff --git a/setup.py b/setup.py index ed5fe5b..d3e3a62 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,7 @@ import glob import os +import sys import torch from setuptools import find_packages, setup @@ -12,6 +13,7 @@ torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3" +os_name = sys.platform def get_extensions(): this_dir = os.path.dirname(os.path.abspath(__file__)) @@ -39,6 +41,8 @@ def get_extensions(): "-D__CUDA_NO_HALF_CONVERSIONS__", "-D__CUDA_NO_HALF2_OPERATORS__", ] + if sys.platform == 'win32': + extra_compile_args["nvcc"].append("-D _WIN64") # It's better if pytorch can do this by default .. CC = os.environ.get("CC", None) @@ -61,13 +65,28 @@ def get_extensions(): cur_dir = os.getcwd() -with open("tools/dl_train", "w") as dl_lib_train: + +if os_name == "win32": + dl_train_name = "tools/dl_train.bat" + dl_test_name = "tools/dl_test.bat" + head = f"set OMP_NUM_THREADS=1\n" + python_command = "python" + parameters = "%*" +elif os_name == "linux": + dl_train_name = "tools/dl_train" + dl_test_name = "tools/dl_test" head = f"#!/bin/bash\n\nexport OMP_NUM_THREADS=1\n" + python_command = "python3" + parameters = "$@" +else: + raise Exception("Target OS not support") + +with open(dl_train_name, "w") as dl_lib_train: dl_lib_train.write( - head + f"python3 {os.path.join(cur_dir, 'tools', 'train_net.py')} $@") -with open("tools/dl_test", "w") as dl_lib_test: + head + f"{python_command} {os.path.join(cur_dir, 'tools', 'train_net.py')} {parameters}") +with open(dl_test_name, "w") as dl_lib_test: dl_lib_test.write( - head + f"python3 {os.path.join(cur_dir, 'tools', 'test_net.py')} $@") + head + f"{python_command} {os.path.join(cur_dir, 'tools', 'test_net.py')} {parameters}") setup( name="dl_lib", @@ -95,5 +114,6 @@ def get_extensions(): extras_require={"all": ["shapely", "psutil"]}, ext_modules=get_extensions(), cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, - scripts=["tools/dl_train", "tools/dl_test"], + scripts=["tools/dl_train", "tools/dl_test"] if os_name == 'linux' + else ["tools/dl_train.bat", "tools/dl_test.bat"], ) diff --git a/tools/train_net.py b/tools/train_net.py index 87d6f2e..a144e30 100644 --- a/tools/train_net.py +++ b/tools/train_net.py @@ -79,13 +79,14 @@ def main(args): cfg, logger = default_setup(config, args) model = build_model(cfg) logger.info(f"Model structure: {model}") - file_sys = os.statvfs(cfg.OUTPUT_DIR) - free_space_Gb = (file_sys.f_bfree * file_sys.f_frsize) / 2**30 - # We assume that a single dumped model is 700Mb - eval_space_Gb = (cfg.SOLVER.LR_SCHEDULER.MAX_ITER // cfg.SOLVER.CHECKPOINT_PERIOD) * 700 / 2**10 - if eval_space_Gb > free_space_Gb: - logger.warning(f"{Fore.RED}Remaining space({free_space_Gb}GB) " - f"is less than ({eval_space_Gb}GB){Style.RESET_ALL}") + if sys.platform == "linux": + file_sys = os.statvfs(cfg.OUTPUT_DIR) + free_space_Gb = (file_sys.f_bfree * file_sys.f_frsize) / 2**30 + # We assume that a single dumped model is 700Mb + eval_space_Gb = (cfg.SOLVER.LR_SCHEDULER.MAX_ITER // cfg.SOLVER.CHECKPOINT_PERIOD) * 700 / 2**10 + if eval_space_Gb > free_space_Gb: + logger.warning(f"{Fore.RED}Remaining space({free_space_Gb}GB) " + f"is less than ({eval_space_Gb}GB){Style.RESET_ALL}") if args.eval_only: DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( cfg.MODEL.WEIGHTS, resume=args.resume