diff --git a/configs/ssd/bash_arch.py b/configs/ssd/bash_arch.py deleted file mode 100644 index 11711d55..00000000 --- a/configs/ssd/bash_arch.py +++ /dev/null @@ -1,182 +0,0 @@ -_base_ = [ - '../_base_/default_runtime_det.py', -] # model settings - -default_scope = 'mmdet' - -# ========================Suggested optional parameters======================== -# MODEL -num_classes = 71 - -# TRAIN -# dataset settings -dataset_type = 'sscma.CustomCocoDataset' -# dataset link: https://universe.roboflow.com/team-roboflow/coco-128 -data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' - -train_ann = 'train/_annotations.coco.json' -train_data = 'train/' -val_ann = 'valid/_annotations.coco.json' -val_data = 'valid/' - -height = 300 -width = 300 -imgsz = (width, height) -batch = 16 -workers = 4 -val_batch = batch -val_workers = workers - -# TRAIN -lr = 0.001 -epochs = 300 - -weight_decay = 0.0005 -momentum = 0.9 - -# ================================END================================= - - -model = dict( - type='SingleStageDetector', - data_preprocessor=dict( - type='DetDataPreprocessor', mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True, pad_size_divisor=1 - ), - backbone=dict( - type='SSDVGG', - depth=16, - with_last_pool=False, - ceil_mode=True, - out_indices=(3, 4), - out_feature_indices=(22, 34), - init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://vgg16_caffe'), - ), - neck=dict( - type='SSDNeck', - in_channels=(512, 1024), - out_channels=(512, 1024, 512, 256, 256, 256), - level_strides=(2, 2, 1, 1), - level_paddings=(1, 1, 0, 0), - l2_norm_scale=20, - ), - bbox_head=dict( - type='SSDHead', - in_channels=(512, 1024, 512, 256, 256, 256), - num_classes=num_classes, - anchor_generator=dict( - type='SSDAnchorGenerator', - scale_major=False, - input_size=height, - basesize_ratio_range=(0.15, 0.9), - strides=[8, 16, 32, 64, 100, 300], - ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]], - ), - bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2]), - ), - # model training and testing settings - train_cfg=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.0, - ignore_iof_thr=-1, - gt_max_assign_all=False, - ), - sampler=dict(type='PseudoSampler'), - smoothl1_beta=1.0, - allowed_border=-1, - pos_weight=-1, - neg_pos_ratio=3, - debug=False, - ), - test_cfg=dict( - nms_pre=1000, nms=dict(type='nms', iou_threshold=0.45), min_bbox_size=0, score_thr=0.02, max_per_img=200 - ), -) -cudnn_benchmark = True - -backend_args = None -train_pipeline = [ - dict(type='LoadImageFromFile', backend_args=backend_args), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Expand', mean=[0.0, 0.0, 0.0], to_rgb=True, ratio_range=(1, 4)), - dict(type='MinIoURandomCrop', min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3), - dict(type='Resize', scale=imgsz, keep_ratio=False), - dict(type='RandomFlip', prob=0.5), - dict( - type='PhotoMetricDistortion', - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18, - ), - dict(type='PackDetInputs'), -] -test_pipeline = [ - dict(type='LoadImageFromFile', backend_args=backend_args), - dict(type='Resize', scale=imgsz, keep_ratio=False), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')), -] -train_dataloader = dict( - batch_size=batch, - num_workers=workers, - batch_sampler=None, - sampler=dict(type='DefaultSampler', shuffle=True, round_up=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=train_ann, - data_prefix=dict(img=train_data), - filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline, - backend_args=backend_args, - ), -) - -val_dataloader = dict( - batch_size=val_batch, - num_workers=val_workers, - persistent_workers=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=val_ann, - data_prefix=dict(img=val_data), - test_mode=True, - pipeline=test_pipeline, - backend_args=backend_args, - ), -) -test_dataloader = val_dataloader - -val_evaluator = dict( - type='CocoMetric', - ann_file=data_root + val_ann, - metric='bbox', - format_only=False, - backend_args=backend_args, -) -test_evaluator = val_evaluator - - -# optimizer -optim_wrapper = dict( - type='OptimWrapper', optimizer=dict(type='SGD', lr=lr, momentum=momentum, weight_decay=weight_decay) -) - -custom_hooks = [ - dict(type='NumClassCheckHook'), - dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW'), -] - -vis_backends = [dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')] -visualizer = dict(type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') - -# NOTE: `auto_scale_lr` is for automatically scaling LR, -# USER SHOULD NOT CHANGE ITS VALUES. -# base_batch_size = (8 GPUs) x (8 samples per GPU) -auto_scale_lr = dict(base_batch_size=batch) diff --git a/configs/yolov5/base_arch.py b/configs/swift_yolo/base_arch.py similarity index 100% rename from configs/yolov5/base_arch.py rename to configs/swift_yolo/base_arch.py diff --git a/configs/yolov5/yolov5_s_1xb16_300e_coco.py b/configs/swift_yolo/swift_yolo_1xb16_300e_coco.py similarity index 84% rename from configs/yolov5/yolov5_s_1xb16_300e_coco.py rename to configs/swift_yolo/swift_yolo_1xb16_300e_coco.py index 971fc301..2226a8a8 100644 --- a/configs/yolov5/yolov5_s_1xb16_300e_coco.py +++ b/configs/swift_yolo/swift_yolo_1xb16_300e_coco.py @@ -4,7 +4,7 @@ # MODEL num_classes = 71 deepen_factor = 0.33 -widen_factor = 0.5 +widen_factor = 0.15 # DATA dataset_type = 'sscma.CustomYOLOv5CocoDataset' @@ -17,12 +17,11 @@ data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' height = 640 width = 640 -imgsz = (width, height) batch = 16 workers = 2 val_batch = batch val_workers = workers - +imgsz = (width, height) # TRAIN persistent_workers = True @@ -86,8 +85,22 @@ dict(type='LoadAnnotations', with_bbox=True), ] +# from mmyolo.datasets.transforms import YOLOv5RandomAffine + +color_space = [ + [dict(type='mmdet.ColorTransform')], + [dict(type='mmdet.AutoContrast')], + [dict(type='mmdet.Equalize')], + [dict(type='mmdet.Sharpness')], + [dict(type='mmdet.Posterize')], + [dict(type='mmdet.Solarize')], + [dict(type='mmdet.Color')], + [dict(type='mmdet.Contrast')], + [dict(type='mmdet.Brightness')], +] train_pipeline = [ - *pre_transform, + dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadAnnotations', with_bbox=True), dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), dict( type='YOLOv5RandomAffine', @@ -104,6 +117,13 @@ bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, ), + dict( + type='mmdet.RandomOrder', + transforms=[ + dict(type='mmdet.RandAugment', aug_space=color_space, aug_num=1), + # dict(type='mmdet.RandAugment', aug_space=geometric, aug_num=1), + ], + ), dict(type='YOLOv5HSVRandomAug'), dict(type='mmdet.RandomFlip', prob=0.5), dict( diff --git a/configs/swift_yolo/swift_yolo_labelmatch_coco.py b/configs/swift_yolo/swift_yolo_labelmatch_coco.py new file mode 100644 index 00000000..450bf570 --- /dev/null +++ b/configs/swift_yolo/swift_yolo_labelmatch_coco.py @@ -0,0 +1,429 @@ +_base_ = ['../_base_/default_runtime_det.py'] +default_scope = 'mmyolo' + +# ========================Suggested optional parameters======================== +# DATA +# Dataset type, this will be used to define the dataset +# dataset_type = 'mmdet.CocoDataset' +supdataset_type = 'sscma.CustomYOLOv5CocoDataset' +unsupdataset_type = 'sscma.UnsupDataset' +# unsupdataset_type='sscma.CustomYOLOv5CocoDataset' +# Path to the dataset's root directory +# dataset link: https://universe.roboflow.com/team-roboflow/coco-128 +data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' + +# Path of train annotation file +# train_ann = 'train/_annotations.coco.json' +train_ann = 'annotations/train_sup1.json' +# Prefix of train image path +train_data = 'train2017/' +# Path of val annotation file +# val_ann = 'valid/_annotations.coco.json' +val_ann = 'annotations/instances_val2017.json' +# Prefix of val image path +val_data = 'val2017/' +# Height of the model input data +height = 640 +# Width of the model input data +width = 640 +# The width and height of the model input data +imgsz = (width, height) # width, height + +# MODEL +# The scaling factor that controls the depth of the network structure +deepen_factor = 0.33 +# The scaling factor that controls the width of the network structure +widen_factor = 0.5 +# Number of classes for classification +num_classes = 80 + +# TRAIN +# Base learning rate for optim_wrapper. Corresponding to 8xb16=128 bs +lr = 0.01 +# Total number of rounds of model training +epochs = 500 +# Number of input data per iteration in the model training phase +batch = 32 +# Number of threads used to load data during training, this value should be adjusted accordingly to the training batch +workers = 1 +# Optimizer weight decay value +weight_decay = 0.0005 +# SGD momentum/Adam beta1 +momentum = 0.937 +# Learning rate scaling factor +lr_factor = 0.01 +# persistent_workers must be False if num_workers is 0 +persistent_workers = True + +# VAL +# Batch size of a single GPU during validation +val_batch = 16 +# Worker to pre-fetch data for each single GPU during validation +val_workers = 2 +# Save model checkpoint and validation intervals +val_interval = 1 +# Model weight saving interval in epochs +save_interval = val_interval +# The maximum checkpoints to keep. +max_keep_ckpts = 3 +# ================================END================================= + +model_test_cfg = dict( + # The config of multi-label for multi-class prediction. + multi_label=True, + # The number of boxes before NMS + nms_pre=30000, + score_thr=0.1, # Threshold to filter out boxes. + nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold + max_per_img=300, +) # Max number of detections of each image + + +# Strides of multi-scale prior box +strides = [8, 16, 32] +num_det_layers = 3 # The number of model output scales +norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) # Normalization config + +anchors = [ + [(10, 13), (16, 30), (33, 23)], # P3/8 + [(30, 61), (62, 45), (59, 119)], # P4/16 + [(116, 90), (156, 198), (373, 326)], # P5/32 +] + +# -----train val related----- +affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio +loss_cls_weight = 0.5 +loss_bbox_weight = 0.05 +loss_obj_weight = 1.0 +prior_match_thr = 4.0 # Priori box matching threshold +# The obj loss weights of the three output layers +obj_level_weights = [4.0, 1.0, 0.4] + +# Single-scale training is recommended to +# be turned on, which can speed up training. +env_cfg = dict(cudnn_benchmark=True) + +# model arch +detector = dict( + type='mmyolo.YOLODetector', + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True + ), + backbone=dict( + type='YOLOv5CSPDarknet', + deepen_factor=deepen_factor, + widen_factor=widen_factor, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU', inplace=True), + ), + neck=dict( + type='YOLOv5PAFPN', + deepen_factor=deepen_factor, + widen_factor=widen_factor, + in_channels=[256, 512, 1024], + out_channels=[256, 512, 1024], + num_csp_blocks=3, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU', inplace=True), + ), + bbox_head=dict( + type='sscma.YOLOV5Head', + head_module=dict( + type='sscma.DetHead', + num_classes=num_classes, + in_channels=[256, 512, 1024], + widen_factor=widen_factor, + featmap_strides=strides, + num_base_priors=3, + ), + prior_generator=dict(type='mmdet.YOLOAnchorGenerator', base_sizes=anchors, strides=strides), + # scaled based on number of detection layers + loss_cls=dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True, reduction='mean', loss_weight=loss_cls_weight), + loss_bbox=dict( + type='IoULoss', + iou_mode='ciou', + bbox_format='xywh', + eps=1e-7, + reduction='mean', + loss_weight=loss_bbox_weight, + return_iou=True, + ), + loss_obj=dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True, reduction='mean', loss_weight=loss_obj_weight), + prior_match_thr=prior_match_thr, + obj_level_weights=obj_level_weights, + ), + test_cfg=model_test_cfg, + # _delete_=True +) +model = dict( + type="sscma.BaseSsod", + detector=detector, + pseudo_label_cfg=dict( + type='sscma.LabelMatch', + # cfg=dict( + # multi_label=False, + # conf_thres=0.1, + # iou_thres=0.65, + # ignore_thres_high=0.6, + # ignore_thres_low=0.1, + # resample_high_percent=0.25, + # resample_low_percent=0.99, + # data_names=('person',), + # data_np=0, + # ), + # target_data_len=10, + # label_num_per_img=10, + # nc=80, + ), + teacher_loss_weight=0, + # da_loss_weight=0, + data_preprocessor=dict(type="mmdet.MultiBranchDataPreprocessor", data_preprocessor=detector['data_preprocessor']), + # data_preprocessor=dict( + # type='mmdet.DetDataPreprocessor', mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True + # ), + semi_train_cfg=dict( + freeze_teacher=True, + sup_weight=1.0, + unsup_weight=4.0, + pseudo_label_initial_score_thr=0.5, + rpn_pseudo_thr=0.9, + cls_pseudo_thr=0.9, + reg_pseudo_thr=0.02, + jitter_times=10, + jitter_scale=0.06, + ), + semi_test_cfg=dict(predict_on='teacher'), + # _delete_=True +) + +color_space = [ + [dict(type='mmdet.ColorTransform')], + [dict(type='mmdet.AutoContrast')], + [dict(type='mmdet.Equalize')], + [dict(type='mmdet.Sharpness')], + [dict(type='mmdet.Posterize')], + [dict(type='mmdet.Solarize')], + [dict(type='mmdet.Color')], + [dict(type='mmdet.Contrast')], + [dict(type='mmdet.Brightness')], +] + +# geometric = [ +# [dict(type='mmdet.Rotate')], +# [dict(type='mmdet.ShearX')], +# [dict(type='mmdet.ShearY')], +# [dict(type='mmdet.TranslateX')], +# [dict(type='mmdet.TranslateY')], +# ] +pre_transform = [ + dict(type='LoadImageFromFile', file_client_args=None), + dict(type='LoadAnnotations', with_bbox=True), +] + + +test_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='YOLOv5KeepRatioResize', scale=imgsz), + dict(type='LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)), + dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), + ), +] + +# pipeline used to augment unlabeled data weakly, +# which will be sent to teacher model for predicting pseudo instances. +weak_pipeline = [ + # dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), + dict(type='LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)), + dict(type='mmdet.RandomFlip', prob=0.5), + dict( + type='mmdet.PackDetInputs', + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + 'flip', + 'flip_direction', + 'homography_matrix', + ), + ), +] +# pipeline used to augment unlabeled data strongly, +# which will be sent to student model for unsupervised training. +strong_pipeline = [ + # dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), + dict(type='LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)), + # dict(type='mmdet.RandomFlip', prob=0.5), + dict( + type='mmdet.RandomOrder', + transforms=[ + dict(type='mmdet.RandAugment', aug_space=color_space, aug_num=1), + # dict(type='mmdet.RandAugment', aug_space=geometric, aug_num=1), + ], + ), + dict(type='mmdet.RandomErasing', n_patches=(1, 5), ratio=(0, 0.2)), + dict(type='mmdet.FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)), + dict( + type='mmdet.PackDetInputs', + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + # 'flip', + # 'flip_direction', + 'homography_matrix', + ), + ), +] +sup_branch_field = ['sup', 'unsup_teacher', 'unsup_student'] +unsup_branch_field = ['sup', 'unsup_teacher', 'unsup_student'] +# pipeline used to augment labeled data, +# which will be sent to student model for supervised training. +sup_pipeline = [ + *pre_transform, + dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), + dict(type='LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)), + # dict(type='mmdet.RandomResize', scale=imgsz, keep_ratio=True), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.RandAugment', aug_space=color_space, aug_num=1), + dict(type='mmdet.FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)), + dict(type='mmdet.MultiBranch', branch_field=sup_branch_field, sup=dict(type='mmdet.PackDetInputs')), +] +# pipeline used to augment unlabeled data into different views +unsup_pipeline = [ + *pre_transform, + dict( + type='mmdet.MultiBranch', + branch_field=unsup_branch_field, + unsup_teacher=weak_pipeline, + unsup_student=strong_pipeline, + ), +] +albu_train_transforms = [ + dict(type='Blur', p=0.01), + dict(type='MedianBlur', p=0.01), + dict(type='ToGray', p=0.01), + dict(type='CLAHE', p=0.01), +] +train_pipeline = [ + *pre_transform, + dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), + dict( + type='YOLOv5RandomAffine', + max_rotate_degree=0.0, + max_shear_degree=0.0, + scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), + # imgsz is (width, height) + border=(-imgsz[0] // 2, -imgsz[1] // 2), + border_val=(114, 114, 114), + ), + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, + ), + dict(type='YOLOv5HSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict( + type='mmdet.MultiBranch', + branch_field=sup_branch_field, + sup=dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction'), + ), + ), +] + +labeled_dataset = dict( + type=supdataset_type, + data_root=data_root, + ann_file=train_ann, + data_prefix=dict(img=train_data), + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=train_pipeline, +) + +unlabeled_dataset = dict( + type=unsupdataset_type, + data_root=data_root, + ann_file=val_ann, + data_prefix=dict(img='unlabel_data'), + filter_cfg=dict(filter_empty_gt=False), + pipeline=unsup_pipeline, +) + + +train_dataloader = dict( + batch_size=batch, + num_workers=workers, + persistent_workers=True, + sampler=dict( + # type='mmdet.GroupMultiSourceSampler', + type='sscma.SemiSampler', + batch_size=batch, + sample_ratio=[1, 4], + round_up=True, + ), + dataset=dict(type='sscma.SemiDataset', sup_dataset=labeled_dataset, unsup_dataset=unlabeled_dataset), +) + +val_dataloader = dict( + batch_size=val_batch, + num_workers=val_workers, + persistent_workers=persistent_workers, + pin_memory=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=supdataset_type, + data_root=data_root, + test_mode=True, + data_prefix=dict(img=val_data), + ann_file=val_ann, + pipeline=test_pipeline, + # batch_shapes_cfg=None, + ), +) + +test_dataloader = val_dataloader + +param_scheduler = None +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', lr=lr, momentum=momentum, weight_decay=weight_decay, nesterov=True, batch_size_per_gpu=batch + ), + constructor='YOLOv5OptimizerConstructor', +) + +default_hooks = dict( + param_scheduler=dict( + type='YOLOv5ParamSchedulerHook', scheduler_type='linear', lr_factor=lr_factor, max_epochs=epochs + ), + checkpoint=dict(type='CheckpointHook', interval=val_interval, save_best='auto', max_keep_ckpts=max_keep_ckpts), +) + + +custom_hooks = [ + dict( + type='EMAHook', ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, strict_load=False, priority=49 + ), + dict(type='mmdet.MeanTeacherHook'), + dict(type="sscma.SemiHook", bure_epoch=200), + dict(type="sscma.LabelMatchHook",priority=100), +] + + +val_evaluator = dict(type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=data_root + val_ann, metric='bbox') +test_evaluator = val_evaluator + +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=epochs, val_interval=val_interval, _delete_=True) +val_cfg = dict(type='sscma.SemiValLoop', bure_epoch=200) +test_cfg = val_cfg diff --git a/configs/yolov5/yolov5_mb2_1xb16_300e_coco.py b/configs/swift_yolo/swift_yolo_mb2_1xb16_300e_coco.py similarity index 100% rename from configs/yolov5/yolov5_mb2_1xb16_300e_coco.py rename to configs/swift_yolo/swift_yolo_mb2_1xb16_300e_coco.py diff --git a/configs/yolov5/yolov5_l_1xb16_300e_coco.py b/configs/swift_yolo/swift_yolo_shuff_1xb16_300e_coco.py similarity index 87% rename from configs/yolov5/yolov5_l_1xb16_300e_coco.py rename to configs/swift_yolo/swift_yolo_shuff_1xb16_300e_coco.py index 6b86b48f..f43deef6 100644 --- a/configs/yolov5/yolov5_l_1xb16_300e_coco.py +++ b/configs/swift_yolo/swift_yolo_shuff_1xb16_300e_coco.py @@ -15,8 +15,8 @@ # dataset link: https://universe.roboflow.com/team-roboflow/coco-128 data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' -height = 640 -width = 640 +height = 320 +width = 320 batch = 16 workers = 2 val_batch = batch @@ -31,7 +31,7 @@ # DATA affine_scale = 0.5 # MODEL -strides = [8, 16, 32] +strides = [8, 16, 21] anchors = [ [(10, 13), (16, 30), (33, 23)], # P3/8 @@ -42,19 +42,29 @@ model = dict( type='mmyolo.YOLODetector', backbone=dict( - type='YOLOv5CSPDarknet', - deepen_factor=deepen_factor, - widen_factor=widen_factor, + type='FastShuffleNetV2', + stage_repeats=[4, 8, 4], + stage_out_channels=[-1, 24, 48, 96, 192], + init_cfg=dict( + type='Pretrained', + checkpoint='https://files.seeedstudio.com/sscma/model_zoo/backbone/fastshufllenet2_sha1_90be6b843860adcc72555d8699dafaf99624bddd.pth', + ), + _delete_=True, ), neck=dict( type='YOLOv5PAFPN', deepen_factor=deepen_factor, widen_factor=widen_factor, + in_channels=[48, 96, 192], + out_channels=[48, 96, 192], + num_csp_blocks=1, + # norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU', inplace=True), ), bbox_head=dict( head_module=dict( num_classes=num_classes, - in_channels=[256, 512, 1024], + in_channels=[48, 96, 192], widen_factor=widen_factor, ), ), diff --git a/configs/yolov5/yolov5_tiny_1xb16_300e_coco.py b/configs/swift_yolo/swift_yolo_tiny_1xb16_300e_coco.py similarity index 100% rename from configs/yolov5/yolov5_tiny_1xb16_300e_coco.py rename to configs/swift_yolo/swift_yolo_tiny_1xb16_300e_coco.py diff --git a/configs/yolov3/README.md b/configs/yolov3/README.md deleted file mode 100644 index d5eda83f..00000000 --- a/configs/yolov3/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# YOLOv3 - -> [YOLOv3: An Incremental Improvement](https://arxiv.org/abs/1804.02767) - - - -## Abstract - -We present some updates to YOLO! We made a bunch of little design changes to make it better. We also trained this new network that's pretty swell. It's a little bigger than last time but more accurate. It's still fast though, don't worry. At 320x320 YOLOv3 runs in 22 ms at 28.2 mAP, as accurate as SSD but three times faster. When we look at the old .5 IOU mAP detection metric YOLOv3 is quite good. It achieves 57.9 mAP@50 in 51 ms on a Titan X, compared to 57.5 mAP@50 in 198 ms by RetinaNet, similar performance but 3.8x faster. - -
- -
- -## Results and Models - -| Backbone | Scale | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | -| :---------: | :---: | :-----: | :------: | :------------: | :----: | :----------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| MobileNetV2 | 416 | 300e | 5.3 | | 24.1 | [config](./yolov3_mbv2_416_coco.py) | [model](https://github.com/Seeed-Studio/edgelab/releases/download/model_zoo/yolov3_mbv2_416_coco.pth)| - -Notice: We reduce the number of channels to 96 in both head and neck. It can reduce the flops and parameters, which makes these models more suitable for edge devices. - -## Credit - -This implementation originates from the project of Haoyu Wu(@wuhy08) at Western Digital. - -## Citation - -```latex -@misc{redmon2018yolov3, - title={YOLOv3: An Incremental Improvement}, - author={Joseph Redmon and Ali Farhadi}, - year={2018}, - eprint={1804.02767}, - archivePrefix={arXiv}, - primaryClass={cs.CV} -} -``` diff --git a/configs/yolov3/yolov3_mbv2_416_voc.py b/configs/yolov3/yolov3_mbv2_416_voc.py deleted file mode 100644 index 54cb78d2..00000000 --- a/configs/yolov3/yolov3_mbv2_416_voc.py +++ /dev/null @@ -1,165 +0,0 @@ -_base_ = '../_base_/default_runtime_det.py' -default_scope = 'mmdet' - -# ========================Suggested optional parameters======================== -# MODEL -num_classes = 71 - -# TRAIN -# dataset settings -dataset_type = 'sscma.CustomCocoDataset' -# dataset link: https://universe.roboflow.com/team-roboflow/coco-128 -data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' - -train_ann = 'train/_annotations.coco.json' -train_data = 'train/' -val_ann = 'valid/_annotations.coco.json' -val_data = 'valid/' - -height = 320 -width = 320 -imgsz = (width, height) - - -# TRAIN -lr = 0.001 -epochs = 300 -batch = 32 -workers = 2 -val_batch=1 -val_workers=1 - -weight_decay = 0.0005 -momentum = 0.9 - -# ================================END================================= -# model settings - -data_preprocessor = dict( - type='DetDataPreprocessor', mean=[0, 0, 0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True, pad_size_divisor=32 -) -model = dict( - type='YOLOV3', - data_preprocessor=data_preprocessor, - backbone=dict( - type='Darknet', - depth=53, - out_indices=(3, 4, 5), - # init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://darknet53') - ), - neck=dict(type='YOLOV3Neck', num_scales=3, in_channels=[1024, 512, 256], out_channels=[512, 256, 128]), - bbox_head=dict( - type='YOLOV3Head', - num_classes=num_classes, - in_channels=[512, 256, 128], - out_channels=[1024, 512, 256], - anchor_generator=dict( - type='YOLOAnchorGenerator', - base_sizes=[ - [(116, 90), (156, 198), (373, 326)], - [(30, 61), (62, 45), (59, 119)], - [(10, 13), (16, 30), (33, 23)], - ], - strides=[32, 16, 8], - ), - bbox_coder=dict(type='YOLOBBoxCoder'), - featmap_strides=[32, 16, 8], - loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0, reduction='sum'), - loss_conf=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0, reduction='sum'), - loss_xy=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=2.0, reduction='sum'), - loss_wh=dict(type='MSELoss', loss_weight=2.0, reduction='sum'), - ), - # training and testing settings - train_cfg=dict(assigner=dict(type='GridAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0)), - test_cfg=dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - conf_thr=0.005, - nms=dict(type='nms', iou_threshold=0.45), - max_per_img=100, - ), -) - - -backend_args = None -train_pipeline = [ - dict(type='LoadImageFromFile', backend_args=backend_args), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Expand', mean=data_preprocessor['mean'], to_rgb=data_preprocessor['bgr_to_rgb'], ratio_range=(1, 2)), - dict(type='MinIoURandomCrop', min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9), min_crop_size=0.3), - dict(type='RandomResize', scale=[(320, 320), imgsz], keep_ratio=True), - dict(type='RandomFlip', prob=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='PackDetInputs'), -] -test_pipeline = [ - dict(type='LoadImageFromFile', backend_args=backend_args), - dict(type='Resize', scale=imgsz, keep_ratio=True), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')), -] - - -train_dataloader = dict( - batch_size=batch, - num_workers=workers, - persistent_workers=True, - sampler=dict(type='DefaultSampler', shuffle=True), - batch_sampler=dict(type='AspectRatioBatchSampler'), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=train_ann, - data_prefix=dict(img=train_data), - filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline, - backend_args=backend_args, - ), -) - -val_dataloader = dict( - batch_size=val_batch, - num_workers=val_workers, - persistent_workers=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=val_ann, - data_prefix=dict(img=val_data), - test_mode=True, - pipeline=test_pipeline, - backend_args=backend_args, - ), -) -test_dataloader = val_dataloader - -# evaluator -val_evaluator = dict(type='CocoMetric', ann_file=data_root + val_ann, metric='bbox', backend_args=backend_args) -test_evaluator = val_evaluator - - -find_unused_parameters = True - -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='SGD', lr=lr, momentum=momentum, weight_decay=weight_decay), - clip_grad=dict(max_norm=35, norm_type=2), -) - -# learning policy -param_scheduler = [ - dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=2000), - dict(type='MultiStepLR', by_epoch=True, milestones=[218, 246], gamma=0.1), -] - -train_cfg = dict(by_epoch=True, max_epochs=epochs) -val_cfg = dict() -test_cfg = dict() -# learning policy -param_scheduler = [ - dict(type='LinearLR', begin=0, end=30, start_factor=0.001, by_epoch=False), # warm-up - dict(type='MultiStepLR', begin=1, end=500, milestones=[100, 200, 250], gamma=0.1, by_epoch=True), -] diff --git a/configs/yolov5/yolov5_m_1xb16_300e_coco.py b/configs/yolov5/yolov5_m_1xb16_300e_coco.py deleted file mode 100644 index 2d525181..00000000 --- a/configs/yolov5/yolov5_m_1xb16_300e_coco.py +++ /dev/null @@ -1,158 +0,0 @@ -_base_ = ['./base_arch.py'] - -# ========================Suggested optional parameters======================== -# MODEL -num_classes = 71 -deepen_factor = 0.67 -widen_factor = 0.75 - -# DATA -dataset_type = 'sscma.CustomYOLOv5CocoDataset' -train_ann = 'train/_annotations.coco.json' -train_data = 'train/' # Prefix of train image path -val_ann = 'valid/_annotations.coco.json' -val_data = 'valid/' # Prefix of val image path - -# dataset link: https://universe.roboflow.com/team-roboflow/coco-128 -data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' -height = 640 -width = 640 -batch = 16 -workers = 2 -val_batch = batch -val_workers = workers -imgsz = (width, height) - -# TRAIN -persistent_workers = True - -# ================================END================================= - -# DATA -affine_scale = 0.5 -# MODEL -strides = [8, 16, 32] - -anchors = [ - [(10, 13), (16, 30), (33, 23)], # P3/8 - [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)], # P5/32 -] - -model = dict( - type='mmyolo.YOLODetector', - backbone=dict( - type='YOLOv5CSPDarknet', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - ), - neck=dict( - type='YOLOv5PAFPN', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - ), - bbox_head=dict( - head_module=dict( - num_classes=num_classes, - in_channels=[256, 512, 1024], - widen_factor=widen_factor, - ), - ), -) - -# ======================datasets================== - - -batch_shapes_cfg = dict( - type='BatchShapePolicy', - batch_size=1, - img_size=imgsz[0], - # The image scale of padding should be divided by pad_size_divisor - size_divisor=32, - # Additional paddings for pixel scale - extra_pad_ratio=0.5, -) - -albu_train_transforms = [ - dict(type='Blur', p=0.01), - dict(type='MedianBlur', p=0.01), - dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01), -] - -pre_transform = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True), -] - -train_pipeline = [ - *pre_transform, - dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), - dict( - type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), - keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, - ), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') - ), -] - -train_dataloader = dict( - batch_size=batch, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=train_ann, - data_prefix=dict(img=train_data), - filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline, - ), -) - -test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='YOLOv5KeepRatioResize', scale=imgsz), - dict(type='LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), - ), -] - -val_dataloader = dict( - batch_size=val_batch, - num_workers=val_workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data), - ann_file=val_ann, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg, - ), -) - -test_dataloader = val_dataloader diff --git a/configs/yolov5/yolov5_n_1xb16_300e_coco.py b/configs/yolov5/yolov5_n_1xb16_300e_coco.py deleted file mode 100644 index 3df3c3aa..00000000 --- a/configs/yolov5/yolov5_n_1xb16_300e_coco.py +++ /dev/null @@ -1,159 +0,0 @@ -_base_ = ['./base_arch.py'] - -# ========================Suggested optional parameters======================== -# MODEL -num_classes = 71 -deepen_factor = 0.33 -widen_factor = 0.25 - -# DATA -dataset_type = 'sscma.CustomYOLOv5CocoDataset' -train_ann = 'train/_annotations.coco.json' -train_data = 'train/' # Prefix of train image path -val_ann = 'valid/_annotations.coco.json' -val_data = 'valid/' # Prefix of val image path - -# dataset link: https://universe.roboflow.com/team-roboflow/coco-128 -data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' -height = 640 -width = 640 -imgsz = (width, height) -batch = 16 -workers = 2 -val_batch = batch -val_workers = workers - - -# TRAIN -persistent_workers = True - -# ================================END================================= - -# DATA -affine_scale = 0.5 -# MODEL -strides = [8, 16, 32] - -anchors = [ - [(10, 13), (16, 30), (33, 23)], # P3/8 - [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)], # P5/32 -] - -model = dict( - type='mmyolo.YOLODetector', - backbone=dict( - type='YOLOv5CSPDarknet', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - ), - neck=dict( - type='YOLOv5PAFPN', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - ), - bbox_head=dict( - head_module=dict( - num_classes=num_classes, - in_channels=[256, 512, 1024], - widen_factor=widen_factor, - ), - ), -) - -# ======================datasets================== - - -batch_shapes_cfg = dict( - type='BatchShapePolicy', - batch_size=1, - img_size=imgsz[0], - # The image scale of padding should be divided by pad_size_divisor - size_divisor=32, - # Additional paddings for pixel scale - extra_pad_ratio=0.5, -) - -albu_train_transforms = [ - dict(type='Blur', p=0.01), - dict(type='MedianBlur', p=0.01), - dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01), -] - -pre_transform = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True), -] - -train_pipeline = [ - *pre_transform, - dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), - dict( - type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), - keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, - ), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') - ), -] - -train_dataloader = dict( - batch_size=batch, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=train_ann, - data_prefix=dict(img=train_data), - filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline, - ), -) - -test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='YOLOv5KeepRatioResize', scale=imgsz), - dict(type='LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), - ), -] - -val_dataloader = dict( - batch_size=val_batch, - num_workers=val_workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data), - ann_file=val_ann, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg, - ), -) - -test_dataloader = val_dataloader diff --git a/configs/yolov6/base.py b/configs/yolov6/base.py deleted file mode 100644 index 531cef12..00000000 --- a/configs/yolov6/base.py +++ /dev/null @@ -1,273 +0,0 @@ -_base_ = ['../_base_/default_runtime_det.py'] -default_scope = 'mmyolo' - - -# ========================Suggested optional parameters======================== -# DATA -# Types of datasets ,The type of the dataset, you can follow sscma/datasets/ to see the types we have defined, -# or you can use the types used by other mmlab libraries, -# but you need to prefix them with the appropriate prefixes to be on the safe side -dataset_type = 'sscma.CustomYOLOv5CocoDataset' -# Path to the dataset's root directory -# dataset link: https://universe.roboflow.com/team-roboflow/coco-128 -data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' -# Path to the annotation file for the training set, both absolute and relative paths are acceptable, -# if it is a relative path, it must be relative to "data_root". -train_ann = 'train/_annotations.coco.json' -# Path to the training set data file, both absolute and relative, if relative, it must be relative to "data_root". -train_data = 'train/' -# Path to the validation set annotation file, both absolute and relative paths are acceptable, -# if it is a relative path, it must be a relative path to data_root. -val_ann = 'valid/_annotations.coco.json' -# Path to the validation set data file, both absolute and relative paths are allowed, -# if it is a relative path, it must be a relative path to data_root. -val_data = 'valid/' -# Height of the model input data -height = 640 -# Width of the model input data -width = 640 -# The width and height of the model input data -imgsz = (width, height) -# persistent_workers must be False if num_workers is 0 -persistent_workers = True - -# MODEL -# Scaling factor for model depth -deepen_factor = 0.33 -# Scaling factor for model width -widen_factor = 0.5 -# Number of categories in the dataset -num_classes = 71 - -# TRAIN -# Learning rate of the model -lr = 0.01 -# Total number of rounds of model training -epochs = 300 -# Number of input data per iteration in the model training phase -batch = 64 -# Number of threads used to load data during training, this value should be adjusted accordingly to the training batch -workers = 8 -# Model weight saving interval in epochs -save_interval = 5 -# Last epoch number to switch training pipeline -num_last_epochs = 15 -# Learning rate scaling factor -lr_factor = 0.01 -# Optimizer weight decay value -weight_decay = 0.0005 -momentum=0.937 - -# VAL -# Number of input data per iteration in the model validation phase -val_batch = 1 -# Number of threads used to load data during validation, this value should be adjusted accordingly to the validation batch -val_workers = workers -# Model validation interval in epoch -val_interval = 5 -# The maximum checkpoints to keep. -max_keep_ckpts = 3 -# ================================END================================= - -# Config of batch shapes. Only on val. -# It means not used if batch_shapes_cfg is None. -batch_shapes_cfg = dict( - type='BatchShapePolicy', - batch_size=val_batch, - img_size=imgsz[0], - size_divisor=32, - extra_pad_ratio=0.5, -) - -# -----train val related----- -affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio - -# Single-scale training is recommended to -# be turned on, which can speed up training. -env_cfg = dict(cudnn_benchmark=True) - -# ============================== Unmodified in most cases =================== -model = dict( - type='YOLODetector', - data_preprocessor=dict( - type='YOLOv5DetDataPreprocessor', mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True - ), - backbone=dict( - type='YOLOv6EfficientRep', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='ReLU', inplace=True), - ), - neck=dict( - type='YOLOv6RepPAFPN', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - in_channels=[256, 512, 1024], - out_channels=[128, 256, 512], - num_csp_blocks=12, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='ReLU', inplace=True), - ), - bbox_head=dict( - type='YOLOv6Head', - head_module=dict( - type='YOLOv6HeadModule', - num_classes=num_classes, - in_channels=[128, 256, 512], - widen_factor=widen_factor, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True), - featmap_strides=[8, 16, 32], - ), - loss_bbox=dict( - type='IoULoss', iou_mode='giou', bbox_format='xyxy', reduction='mean', loss_weight=2.5, return_iou=False - ), - ), - train_cfg=dict( - initial_epoch=4, - initial_assigner=dict( - type='BatchATSSAssigner', num_classes=num_classes, topk=9, iou_calculator=dict(type='mmdet.BboxOverlaps2D') - ), - assigner=dict(type='BatchTaskAlignedAssigner', num_classes=num_classes, topk=13, alpha=1, beta=6), - ), - test_cfg=dict( - multi_label=True, nms_pre=30000, score_thr=0.001, nms=dict(type='nms', iou_threshold=0.65), max_per_img=300 - ), -) - -# The training pipeline of YOLOv6 is basically the same as YOLOv5. -# The difference is that Mosaic and RandomAffine will be closed in the last 15 epochs. # noqa -pre_transform = [dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadAnnotations', with_bbox=True)] - -train_pipeline = [ - *pre_transform, - dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_translate_ratio=0.1, - scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - max_shear_degree=0.0, - ), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') - ), -] - -train_pipeline_stage2 = [ - *pre_transform, - dict(type='YOLOv5KeepRatioResize', scale=imgsz), - dict(type='LetterResize', scale=imgsz, allow_scale_up=True, pad_val=dict(img=114)), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_translate_ratio=0.1, - scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), - max_shear_degree=0.0, - ), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') - ), -] - -train_dataloader = dict( - batch_size=batch, - num_workers=workers, - collate_fn=dict(type='yolov5_collate'), - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=train_ann, - data_prefix=dict(img=train_data), - filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline, - ), -) - -test_pipeline = [ - dict(type='LoadImageFromFile', backend_args=None), - dict(type='YOLOv5KeepRatioResize', scale=imgsz), - dict(type='LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), - ), -] - -val_dataloader = dict( - batch_size=val_batch, - num_workers=val_workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data), - ann_file=val_ann, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg, - ), -) - -test_dataloader = val_dataloader - -# Optimizer and learning rate scheduler of YOLOv6 are basically the same as YOLOv5. # noqa -# The difference is that the scheduler_type of YOLOv6 is cosine. -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict( - type='SGD', - lr=lr, - momentum=momentum, - weight_decay=weight_decay, - nesterov=True, - batch_size_per_gpu=batch, - ), - constructor='YOLOv5OptimizerConstructor', -) - -default_hooks = dict( - param_scheduler=dict( - type='YOLOv5ParamSchedulerHook', scheduler_type='cosine', lr_factor=lr_factor, max_epochs=epochs - ), - checkpoint=dict(type='CheckpointHook', interval=save_interval, max_keep_ckpts=max_keep_ckpts, save_best='auto'), -) - -custom_hooks = [ - dict( - type='EMAHook', ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, strict_load=False, priority=49 - ), - dict( - type='mmdet.PipelineSwitchHook', - switch_epoch=epochs - num_last_epochs, - switch_pipeline=train_pipeline_stage2, - ), -] - -val_evaluator = dict(type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=data_root + val_ann, metric='bbox') -test_evaluator = val_evaluator - -train_cfg = dict( - type='EpochBasedTrainLoop', - max_epochs=epochs, - val_interval=val_interval, - dynamic_intervals=[(epochs - num_last_epochs, 1)], - _delete_=True, -) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') diff --git a/configs/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py b/configs/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py deleted file mode 100644 index d2d66c8a..00000000 --- a/configs/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py +++ /dev/null @@ -1,29 +0,0 @@ -_base_ = './yolov6_m_syncbn_fast_8xb32-300e_coco.py' - -# ========================Suggested optional parameters======================== - -# -----model related----- -# The scaling factor that controls the depth of the network structure -deepen_factor = 1 -# The scaling factor that controls the width of the network structure -widen_factor = 1 -# ================================END================================= - -# ============================== Unmodified in most cases =================== -model = dict( - backbone=dict( - deepen_factor=deepen_factor, - widen_factor=widen_factor, - hidden_ratio=1.0 / 2, - block_cfg=dict(type='ConvWrapper', norm_cfg=dict(type='BN', momentum=0.03, eps=0.001)), - act_cfg=dict(type='SiLU', inplace=True), - ), - neck=dict( - deepen_factor=deepen_factor, - widen_factor=widen_factor, - hidden_ratio=1.0 / 2, - block_cfg=dict(type='ConvWrapper', norm_cfg=dict(type='BN', momentum=0.03, eps=0.001)), - block_act_cfg=dict(type='SiLU', inplace=True), - ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor)), -) diff --git a/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py b/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py deleted file mode 100644 index 93a3365f..00000000 --- a/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py +++ /dev/null @@ -1,68 +0,0 @@ -_base_ = './base.py' - -# ========================Suggested optional parameters======================== -# -----model related----- -# The scaling factor that controls the depth of the network structure -deepen_factor = 0.6 -# The scaling factor that controls the width of the network structure -widen_factor = 0.75 - -# DATA -height = 640 -width = 640 -imgsz = (width, height) -# ================================END================================= - -# -----train val related----- -affine_scale = 0.9 # YOLOv5RandomAffine scaling ratio - -# ============================== Unmodified in most cases =================== - - -model = dict( - backbone=dict( - type='YOLOv6CSPBep', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - hidden_ratio=2.0 / 3, - block_cfg=dict(type='RepVGGBlock'), - act_cfg=dict(type='ReLU', inplace=True), - ), - neck=dict( - type='YOLOv6CSPRepPAFPN', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - block_cfg=dict(type='RepVGGBlock'), - hidden_ratio=2.0 / 3, - block_act_cfg=dict(type='ReLU', inplace=True), - ), - bbox_head=dict(type='YOLOv6Head', head_module=dict(widen_factor=widen_factor)), -) - -pre_transform = [dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadAnnotations', with_bbox=True)] - -mosaic_affine_pipeline = [ - dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), -] - -train_pipeline = [ - *pre_transform, - *mosaic_affine_pipeline, - dict(type='YOLOv5MixUp', prob=0.1, pre_transform=[*pre_transform, *mosaic_affine_pipeline]), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') - ), -] - -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py b/configs/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py deleted file mode 100644 index ac7d5fb5..00000000 --- a/configs/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py +++ /dev/null @@ -1,21 +0,0 @@ -_base_ = './base.py' - -# ========================Suggested optional parameters======================== -# -----model related----- -# The scaling factor that controls the depth of the network structure -deepen_factor = 0.33 -# The scaling factor that controls the width of the network structure -widen_factor = 0.25 - -# -----train val related----- -lr_factor = 0.02 # Learning rate scaling factor -# ================================END================================= - -# ============================== Unmodified in most cases =================== -model = dict( - backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), - neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), - bbox_head=dict(head_module=dict(widen_factor=widen_factor), loss_bbox=dict(iou_mode='siou')), -) - -default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor)) diff --git a/configs/yolov6/yolov6_s_syncbn_fast_8xb32-300e_coco.py b/configs/yolov6/yolov6_s_syncbn_fast_8xb32-300e_coco.py deleted file mode 100644 index 531cef12..00000000 --- a/configs/yolov6/yolov6_s_syncbn_fast_8xb32-300e_coco.py +++ /dev/null @@ -1,273 +0,0 @@ -_base_ = ['../_base_/default_runtime_det.py'] -default_scope = 'mmyolo' - - -# ========================Suggested optional parameters======================== -# DATA -# Types of datasets ,The type of the dataset, you can follow sscma/datasets/ to see the types we have defined, -# or you can use the types used by other mmlab libraries, -# but you need to prefix them with the appropriate prefixes to be on the safe side -dataset_type = 'sscma.CustomYOLOv5CocoDataset' -# Path to the dataset's root directory -# dataset link: https://universe.roboflow.com/team-roboflow/coco-128 -data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' -# Path to the annotation file for the training set, both absolute and relative paths are acceptable, -# if it is a relative path, it must be relative to "data_root". -train_ann = 'train/_annotations.coco.json' -# Path to the training set data file, both absolute and relative, if relative, it must be relative to "data_root". -train_data = 'train/' -# Path to the validation set annotation file, both absolute and relative paths are acceptable, -# if it is a relative path, it must be a relative path to data_root. -val_ann = 'valid/_annotations.coco.json' -# Path to the validation set data file, both absolute and relative paths are allowed, -# if it is a relative path, it must be a relative path to data_root. -val_data = 'valid/' -# Height of the model input data -height = 640 -# Width of the model input data -width = 640 -# The width and height of the model input data -imgsz = (width, height) -# persistent_workers must be False if num_workers is 0 -persistent_workers = True - -# MODEL -# Scaling factor for model depth -deepen_factor = 0.33 -# Scaling factor for model width -widen_factor = 0.5 -# Number of categories in the dataset -num_classes = 71 - -# TRAIN -# Learning rate of the model -lr = 0.01 -# Total number of rounds of model training -epochs = 300 -# Number of input data per iteration in the model training phase -batch = 64 -# Number of threads used to load data during training, this value should be adjusted accordingly to the training batch -workers = 8 -# Model weight saving interval in epochs -save_interval = 5 -# Last epoch number to switch training pipeline -num_last_epochs = 15 -# Learning rate scaling factor -lr_factor = 0.01 -# Optimizer weight decay value -weight_decay = 0.0005 -momentum=0.937 - -# VAL -# Number of input data per iteration in the model validation phase -val_batch = 1 -# Number of threads used to load data during validation, this value should be adjusted accordingly to the validation batch -val_workers = workers -# Model validation interval in epoch -val_interval = 5 -# The maximum checkpoints to keep. -max_keep_ckpts = 3 -# ================================END================================= - -# Config of batch shapes. Only on val. -# It means not used if batch_shapes_cfg is None. -batch_shapes_cfg = dict( - type='BatchShapePolicy', - batch_size=val_batch, - img_size=imgsz[0], - size_divisor=32, - extra_pad_ratio=0.5, -) - -# -----train val related----- -affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio - -# Single-scale training is recommended to -# be turned on, which can speed up training. -env_cfg = dict(cudnn_benchmark=True) - -# ============================== Unmodified in most cases =================== -model = dict( - type='YOLODetector', - data_preprocessor=dict( - type='YOLOv5DetDataPreprocessor', mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True - ), - backbone=dict( - type='YOLOv6EfficientRep', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='ReLU', inplace=True), - ), - neck=dict( - type='YOLOv6RepPAFPN', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - in_channels=[256, 512, 1024], - out_channels=[128, 256, 512], - num_csp_blocks=12, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='ReLU', inplace=True), - ), - bbox_head=dict( - type='YOLOv6Head', - head_module=dict( - type='YOLOv6HeadModule', - num_classes=num_classes, - in_channels=[128, 256, 512], - widen_factor=widen_factor, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True), - featmap_strides=[8, 16, 32], - ), - loss_bbox=dict( - type='IoULoss', iou_mode='giou', bbox_format='xyxy', reduction='mean', loss_weight=2.5, return_iou=False - ), - ), - train_cfg=dict( - initial_epoch=4, - initial_assigner=dict( - type='BatchATSSAssigner', num_classes=num_classes, topk=9, iou_calculator=dict(type='mmdet.BboxOverlaps2D') - ), - assigner=dict(type='BatchTaskAlignedAssigner', num_classes=num_classes, topk=13, alpha=1, beta=6), - ), - test_cfg=dict( - multi_label=True, nms_pre=30000, score_thr=0.001, nms=dict(type='nms', iou_threshold=0.65), max_per_img=300 - ), -) - -# The training pipeline of YOLOv6 is basically the same as YOLOv5. -# The difference is that Mosaic and RandomAffine will be closed in the last 15 epochs. # noqa -pre_transform = [dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadAnnotations', with_bbox=True)] - -train_pipeline = [ - *pre_transform, - dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_translate_ratio=0.1, - scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - max_shear_degree=0.0, - ), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') - ), -] - -train_pipeline_stage2 = [ - *pre_transform, - dict(type='YOLOv5KeepRatioResize', scale=imgsz), - dict(type='LetterResize', scale=imgsz, allow_scale_up=True, pad_val=dict(img=114)), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_translate_ratio=0.1, - scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), - max_shear_degree=0.0, - ), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') - ), -] - -train_dataloader = dict( - batch_size=batch, - num_workers=workers, - collate_fn=dict(type='yolov5_collate'), - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=train_ann, - data_prefix=dict(img=train_data), - filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline, - ), -) - -test_pipeline = [ - dict(type='LoadImageFromFile', backend_args=None), - dict(type='YOLOv5KeepRatioResize', scale=imgsz), - dict(type='LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), - ), -] - -val_dataloader = dict( - batch_size=val_batch, - num_workers=val_workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data), - ann_file=val_ann, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg, - ), -) - -test_dataloader = val_dataloader - -# Optimizer and learning rate scheduler of YOLOv6 are basically the same as YOLOv5. # noqa -# The difference is that the scheduler_type of YOLOv6 is cosine. -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict( - type='SGD', - lr=lr, - momentum=momentum, - weight_decay=weight_decay, - nesterov=True, - batch_size_per_gpu=batch, - ), - constructor='YOLOv5OptimizerConstructor', -) - -default_hooks = dict( - param_scheduler=dict( - type='YOLOv5ParamSchedulerHook', scheduler_type='cosine', lr_factor=lr_factor, max_epochs=epochs - ), - checkpoint=dict(type='CheckpointHook', interval=save_interval, max_keep_ckpts=max_keep_ckpts, save_best='auto'), -) - -custom_hooks = [ - dict( - type='EMAHook', ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, strict_load=False, priority=49 - ), - dict( - type='mmdet.PipelineSwitchHook', - switch_epoch=epochs - num_last_epochs, - switch_pipeline=train_pipeline_stage2, - ), -] - -val_evaluator = dict(type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=data_root + val_ann, metric='bbox') -test_evaluator = val_evaluator - -train_cfg = dict( - type='EpochBasedTrainLoop', - max_epochs=epochs, - val_interval=val_interval, - dynamic_intervals=[(epochs - num_last_epochs, 1)], - _delete_=True, -) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') diff --git a/configs/yolov6/yolov6_t_syncbn_fast_8xb32-300e_coco.py b/configs/yolov6/yolov6_t_syncbn_fast_8xb32-300e_coco.py deleted file mode 100644 index 8891e835..00000000 --- a/configs/yolov6/yolov6_t_syncbn_fast_8xb32-300e_coco.py +++ /dev/null @@ -1,16 +0,0 @@ -_base_ = './base.py' - -# ========================Suggested optional parameters======================== -# -----model related----- -# The scaling factor that controls the depth of the network structure -deepen_factor = 0.33 -# The scaling factor that controls the width of the network structure -widen_factor = 0.375 -# ================================END================================= - -# ============================== Unmodified in most cases =================== -model = dict( - backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), - neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), - bbox_head=dict(type='YOLOv6Head', head_module=dict(widen_factor=widen_factor), loss_bbox=dict(iou_mode='siou')), -) diff --git a/configs/yolov7/base.py b/configs/yolov7/base.py deleted file mode 100644 index dba6a170..00000000 --- a/configs/yolov7/base.py +++ /dev/null @@ -1,337 +0,0 @@ -_base_ = ['../_base_/default_runtime_det.py'] -default_scope = 'mmyolo' - -# ========================Suggested optional parameters======================== -# DATA -# Types of datasets ,The type of the dataset, you can follow sscma/datasets/ to see the types we have defined, -# or you can use the types used by other mmlab libraries, -# but you need to prefix them with the appropriate prefixes to be on the safe side -dataset_type = 'sscma.CustomYOLOv5CocoDataset' -# Path to the dataset's root directory -# dataset link: https://universe.roboflow.com/team-roboflow/coco-128 -data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' -# Path to the annotation file for the training set, both absolute and relative paths are acceptable, -# if it is a relative path, it must be relative to "data_root". -train_ann = 'train/_annotations.coco.json' -# Path to the training set data file, both absolute and relative, if relative, it must be relative to "data_root". -train_data = 'train/' -# Path to the validation set annotation file, both absolute and relative paths are acceptable, -# if it is a relative path, it must be a relative path to data_root. -val_ann = 'valid/_annotations.coco.json' -# Path to the validation set data file, both absolute and relative paths are allowed, -# if it is a relative path, it must be a relative path to data_root. -val_data = 'valid/' -# Height of the model input data -height = 640 -# Width of the model input data -width = 640 -# The width and height of the model input data -imgsz = (width, height) -# persistent_workers must be False if num_workers is 0 -persistent_workers = True - -# MODEL -# Number of categories in the dataset -num_classes = 71 - -# TRAIN -# Learning rate of the model -lr = 0.01 -# Total number of rounds of model training -epochs = 300 -# Number of input data per iteration in the model training phase -batch = 32 -# Number of threads used to load data during training, this value should be adjusted accordingly to the training batch -workers = 8 -# Model weight saving interval in epochs -save_interval = 5 -# Last epoch number to switch training pipeline -num_last_epochs = 15 -# Learning rate scaling factor -lr_factor = 0.01 -# Optimizer weight decay value -weight_decay = 0.0005 -momentum=0.937 - -# VAL -# Number of input data per iteration in the model validation phase -val_batch = 1 -# Number of threads used to load data during validation, this value should be adjusted accordingly to the validation batch -val_workers = workers -# Model validation interval in epoch -val_interval = 5 -# The maximum checkpoints to keep. -max_keep_ckpts = 1 -# ================================END================================= -# -----model related----- -# Basic size of multi-scale prior box -anchors = [ - [(12, 16), (19, 36), (40, 28)], # P3/8 - [(36, 75), (76, 55), (72, 146)], # P4/16 - [(142, 110), (192, 243), (459, 401)], # P5/32 -] - - -num_epoch_stage2 = 30 # The last 30 epochs switch evaluation interval -val_interval_stage2 = 1 # Evaluation interval -# ========================Possible modified parameters======================== - - -model_test_cfg = dict( - # The config of multi-label for multi-class prediction. - multi_label=True, - # The number of boxes before NMS. - nms_pre=30000, - score_thr=0.001, # Threshold to filter out boxes. - nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold - max_per_img=300, -) # Max number of detections of each image - - -# Config of batch shapes. Only on val. -# It means not used if batch_shapes_cfg is None. -batch_shapes_cfg = dict( - type='BatchShapePolicy', - batch_size=val_batch, - img_size=imgsz[0], - # The image scale of padding should be divided by pad_size_divisor - size_divisor=32, - # Additional paddings for pixel scale - extra_pad_ratio=0.5, -) - -# -----model related----- -strides = [8, 16, 32] # Strides of multi-scale prior box -num_det_layers = 3 # The number of model output scales -norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) - -# Data augmentation -max_translate_ratio = 0.2 # YOLOv5RandomAffine -scaling_ratio_range = (0.1, 2.0) # YOLOv5RandomAffine -mixup_prob = 0.15 # YOLOv5MixUp -randchoice_mosaic_prob = [0.8, 0.2] -mixup_alpha = 8.0 # YOLOv5MixUp -mixup_beta = 8.0 # YOLOv5MixUp - -# -----train val related----- -loss_cls_weight = 0.3 -loss_bbox_weight = 0.05 -loss_obj_weight = 0.7 -# BatchYOLOv7Assigner params -simota_candidate_topk = 10 -simota_iou_weight = 3.0 -simota_cls_weight = 1.0 -prior_match_thr = 4.0 # Priori box matching threshold -obj_level_weights = [4.0, 1.0, 0.4] # The obj loss weights of the three output layers - -# Single-scale training is recommended to -# be turned on, which can speed up training. -env_cfg = dict(cudnn_benchmark=True) - -# ===============================Unmodified in most cases==================== -model = dict( - type='YOLODetector', - data_preprocessor=dict( - type='YOLOv5DetDataPreprocessor', mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True - ), - backbone=dict(type='YOLOv7Backbone', arch='L', norm_cfg=norm_cfg, act_cfg=dict(type='SiLU', inplace=True)), - neck=dict( - type='YOLOv7PAFPN', - block_cfg=dict(type='ELANBlock', middle_ratio=0.5, block_ratio=0.25, num_blocks=4, num_convs_in_block=1), - upsample_feats_cat_first=False, - in_channels=[512, 1024, 1024], - # The real output channel will be multiplied by 2 - out_channels=[128, 256, 512], - norm_cfg=norm_cfg, - act_cfg=dict(type='SiLU', inplace=True), - ), - bbox_head=dict( - type='YOLOv7Head', - head_module=dict( - type='YOLOv7HeadModule', - num_classes=num_classes, - in_channels=[256, 512, 1024], - featmap_strides=strides, - num_base_priors=3, - ), - prior_generator=dict(type='mmdet.YOLOAnchorGenerator', base_sizes=anchors, strides=strides), - # scaled based on number of detection layers - loss_cls=dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='mean', - loss_weight=loss_cls_weight * (num_classes / 80 * 3 / num_det_layers), - ), - loss_bbox=dict( - type='IoULoss', - iou_mode='ciou', - bbox_format='xywh', - reduction='mean', - loss_weight=loss_bbox_weight * (3 / num_det_layers), - return_iou=True, - ), - loss_obj=dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='mean', - loss_weight=loss_obj_weight * ((imgsz[0] / 640) ** 2 * 3 / num_det_layers), - ), - prior_match_thr=prior_match_thr, - obj_level_weights=obj_level_weights, - # BatchYOLOv7Assigner params - simota_candidate_topk=simota_candidate_topk, - simota_iou_weight=simota_iou_weight, - simota_cls_weight=simota_cls_weight, - ), - test_cfg=model_test_cfg, -) - -pre_transform = [dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadAnnotations', with_bbox=True)] - -mosiac4_pipeline = [ - dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - max_translate_ratio=max_translate_ratio, # note - scaling_ratio_range=scaling_ratio_range, # note - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), -] - -mosiac9_pipeline = [ - dict(type='Mosaic9', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - max_translate_ratio=max_translate_ratio, # note - scaling_ratio_range=scaling_ratio_range, # note - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), -] - -randchoice_mosaic_pipeline = dict( - type='RandomChoice', transforms=[mosiac4_pipeline, mosiac9_pipeline], prob=randchoice_mosaic_prob -) - -train_pipeline = [ - *pre_transform, - randchoice_mosaic_pipeline, - dict( - type='YOLOv5MixUp', - alpha=mixup_alpha, # note - beta=mixup_beta, # note - prob=mixup_prob, - pre_transform=[*pre_transform, randchoice_mosaic_pipeline], - ), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') - ), -] - -train_dataloader = dict( - batch_size=batch, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - collate_fn=dict(type='yolov5_collate'), # FASTER - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=train_ann, - data_prefix=dict(img=train_data), - filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline, - ), -) - -test_pipeline = [ - dict(type='LoadImageFromFile', backend_args=None), - dict(type='YOLOv5KeepRatioResize', scale=imgsz), - dict(type='LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), - ), -] - -val_dataloader = dict( - batch_size=val_batch, - num_workers=val_workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data), - ann_file=val_ann, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg, - ), -) - -test_dataloader = val_dataloader - -param_scheduler = None -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict( - type='SGD', - lr=lr, - momentum=momentum, - weight_decay=weight_decay, - nesterov=True, - batch_size_per_gpu=batch, - ), - constructor='YOLOv7OptimWrapperConstructor', -) - -default_hooks = dict( - param_scheduler=dict( - type='YOLOv5ParamSchedulerHook', scheduler_type='cosine', lr_factor=lr_factor, max_epochs=epochs # note - ), - checkpoint=dict( - type='CheckpointHook', - save_param_scheduler=False, - interval=save_interval, - save_best='auto', - max_keep_ckpts=max_keep_ckpts, - ), -) - -custom_hooks = [ - dict( - type='EMAHook', ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, strict_load=False, priority=49 - ) -] - -val_evaluator = dict( - type='mmdet.CocoMetric', - proposal_nums=(100, 1, 10), # Can be accelerated - ann_file=data_root + val_ann, - metric='bbox', -) -test_evaluator = val_evaluator - -train_cfg = dict( - type='EpochBasedTrainLoop', - max_epochs=epochs, - val_interval=val_interval, - dynamic_intervals=[(epochs - num_epoch_stage2, val_interval_stage2)], - _delete_=True, -) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') diff --git a/configs/yolov7/yolov7_d-p6_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_d-p6_syncbn_fast_8x16b-300e_coco.py deleted file mode 100644 index 12585f94..00000000 --- a/configs/yolov7/yolov7_d-p6_syncbn_fast_8x16b-300e_coco.py +++ /dev/null @@ -1,19 +0,0 @@ -_base_ = './yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py' - -model = dict( - backbone=dict(arch='D'), - neck=dict( - use_maxpool_in_downsample=True, - use_in_channels_in_downsample=True, - block_cfg=dict(type='ELANBlock', middle_ratio=0.4, block_ratio=0.2, num_blocks=6, num_convs_in_block=1), - in_channels=[384, 768, 1152, 1536], - out_channels=[192, 384, 576, 768], - ), - bbox_head=dict( - head_module=dict( - in_channels=[192, 384, 576, 768], - main_out_channels=[384, 768, 1152, 1536], - aux_out_channels=[384, 768, 1152, 1536], - ) - ), -) diff --git a/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py deleted file mode 100644 index efbcf41d..00000000 --- a/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py +++ /dev/null @@ -1,13 +0,0 @@ -_base_ = './yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py' - -model = dict( - backbone=dict(arch='E'), - neck=dict( - use_maxpool_in_downsample=True, - use_in_channels_in_downsample=True, - block_cfg=dict(type='ELANBlock', middle_ratio=0.4, block_ratio=0.2, num_blocks=6, num_convs_in_block=1), - in_channels=[320, 640, 960, 1280], - out_channels=[160, 320, 480, 640], - ), - bbox_head=dict(head_module=dict(in_channels=[160, 320, 480, 640], main_out_channels=[320, 640, 960, 1280])), -) diff --git a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py deleted file mode 100644 index b73ea45c..00000000 --- a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py +++ /dev/null @@ -1,337 +0,0 @@ -_base_ = ['../_base_/default_runtime_det.py'] -default_scope = 'mmyolo' - -# ========================Suggested optional parameters======================== -# DATA -# Types of datasets ,The type of the dataset, you can follow sscma/datasets/ to see the types we have defined, -# or you can use the types used by other mmlab libraries, -# but you need to prefix them with the appropriate prefixes to be on the safe side -dataset_type = 'sscma.CustomYOLOv5CocoDataset' -# Path to the dataset's root directory -# dataset link: https://universe.roboflow.com/team-roboflow/coco-128 -data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' -# Path to the annotation file for the training set, both absolute and relative paths are acceptable, -# if it is a relative path, it must be relative to "data_root". -train_ann = 'train/_annotations.coco.json' -# Path to the training set data file, both absolute and relative, if relative, it must be relative to "data_root". -train_data = 'train/' -# Path to the validation set annotation file, both absolute and relative paths are acceptable, -# if it is a relative path, it must be a relative path to data_root. -val_ann = 'valid/_annotations.coco.json' -# Path to the validation set data file, both absolute and relative paths are allowed, -# if it is a relative path, it must be a relative path to data_root. -val_data = 'valid/' -# Height of the model input data -height = 640 -# Width of the model input data -width = 640 -# The width and height of the model input data -imgsz = (width, height) -# persistent_workers must be False if num_workers is 0 -persistent_workers = True - -# MODEL -# Number of categories in the dataset -num_classes = 71 - -# TRAIN -# Learning rate of the model -lr = 0.01 -# Total number of rounds of model training -epochs = 300 -# Number of input data per iteration in the model training phase -batch = 64 -# Number of threads used to load data during training, this value should be adjusted accordingly to the training batch -workers = 8 -# Model weight saving interval in epochs -save_interval = 5 -# Last epoch number to switch training pipeline -num_last_epochs = 15 -# Learning rate scaling factor -lr_factor = 0.01 -# Optimizer weight decay value -weight_decay = 0.0005 -momentum = 0.937 - -# VAL -# Number of input data per iteration in the model validation phase -val_batch = batch -# Number of threads used to load data during validation, this value should be adjusted accordingly to the validation batch -val_workers = workers -# Model validation interval in epoch -val_interval = 5 -# The maximum checkpoints to keep. -max_keep_ckpts = 3 -# ================================END================================= -# -----model related----- -# Basic size of multi-scale prior box -anchors = [ - [(12, 16), (19, 36), (40, 28)], # P3/8 - [(36, 75), (76, 55), (72, 146)], # P4/16 - [(142, 110), (192, 243), (459, 401)], # P5/32 -] - - -num_epoch_stage2 = 30 # The last 30 epochs switch evaluation interval -val_interval_stage2 = 1 # Evaluation interval -# ========================Possible modified parameters======================== - - -model_test_cfg = dict( - # The config of multi-label for multi-class prediction. - multi_label=True, - # The number of boxes before NMS. - nms_pre=30000, - score_thr=0.001, # Threshold to filter out boxes. - nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold - max_per_img=300, -) # Max number of detections of each image - - -# Config of batch shapes. Only on val. -# It means not used if batch_shapes_cfg is None. -batch_shapes_cfg = dict( - type='BatchShapePolicy', - batch_size=val_batch, - img_size=imgsz[0], - # The image scale of padding should be divided by pad_size_divisor - size_divisor=32, - # Additional paddings for pixel scale - extra_pad_ratio=0.5, -) - -# -----model related----- -strides = [8, 16, 32] # Strides of multi-scale prior box -num_det_layers = 3 # The number of model output scales -norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) - -# Data augmentation -max_translate_ratio = 0.2 # YOLOv5RandomAffine -scaling_ratio_range = (0.1, 2.0) # YOLOv5RandomAffine -mixup_prob = 0.15 # YOLOv5MixUp -randchoice_mosaic_prob = [0.8, 0.2] -mixup_alpha = 8.0 # YOLOv5MixUp -mixup_beta = 8.0 # YOLOv5MixUp - -# -----train val related----- -loss_cls_weight = 0.3 -loss_bbox_weight = 0.05 -loss_obj_weight = 0.7 -# BatchYOLOv7Assigner params -simota_candidate_topk = 10 -simota_iou_weight = 3.0 -simota_cls_weight = 1.0 -prior_match_thr = 4.0 # Priori box matching threshold -obj_level_weights = [4.0, 1.0, 0.4] # The obj loss weights of the three output layers - -# Single-scale training is recommended to -# be turned on, which can speed up training. -env_cfg = dict(cudnn_benchmark=True) - -# ===============================Unmodified in most cases==================== -model = dict( - type='YOLODetector', - data_preprocessor=dict( - type='YOLOv5DetDataPreprocessor', mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True - ), - backbone=dict(type='YOLOv7Backbone', arch='L', norm_cfg=norm_cfg, act_cfg=dict(type='SiLU', inplace=True)), - neck=dict( - type='YOLOv7PAFPN', - block_cfg=dict(type='ELANBlock', middle_ratio=0.5, block_ratio=0.25, num_blocks=4, num_convs_in_block=1), - upsample_feats_cat_first=False, - in_channels=[512, 1024, 1024], - # The real output channel will be multiplied by 2 - out_channels=[128, 256, 512], - norm_cfg=norm_cfg, - act_cfg=dict(type='SiLU', inplace=True), - ), - bbox_head=dict( - type='YOLOv7Head', - head_module=dict( - type='YOLOv7HeadModule', - num_classes=num_classes, - in_channels=[256, 512, 1024], - featmap_strides=strides, - num_base_priors=3, - ), - prior_generator=dict(type='mmdet.YOLOAnchorGenerator', base_sizes=anchors, strides=strides), - # scaled based on number of detection layers - loss_cls=dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='mean', - loss_weight=loss_cls_weight * (num_classes / 80 * 3 / num_det_layers), - ), - loss_bbox=dict( - type='IoULoss', - iou_mode='ciou', - bbox_format='xywh', - reduction='mean', - loss_weight=loss_bbox_weight * (3 / num_det_layers), - return_iou=True, - ), - loss_obj=dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='mean', - loss_weight=loss_obj_weight * ((imgsz[0] / 640) ** 2 * 3 / num_det_layers), - ), - prior_match_thr=prior_match_thr, - obj_level_weights=obj_level_weights, - # BatchYOLOv7Assigner params - simota_candidate_topk=simota_candidate_topk, - simota_iou_weight=simota_iou_weight, - simota_cls_weight=simota_cls_weight, - ), - test_cfg=model_test_cfg, -) - -pre_transform = [dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadAnnotations', with_bbox=True)] - -mosiac4_pipeline = [ - dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - max_translate_ratio=max_translate_ratio, # note - scaling_ratio_range=scaling_ratio_range, # note - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), -] - -mosiac9_pipeline = [ - dict(type='Mosaic9', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - max_translate_ratio=max_translate_ratio, # note - scaling_ratio_range=scaling_ratio_range, # note - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), -] - -randchoice_mosaic_pipeline = dict( - type='RandomChoice', transforms=[mosiac4_pipeline, mosiac9_pipeline], prob=randchoice_mosaic_prob -) - -train_pipeline = [ - *pre_transform, - randchoice_mosaic_pipeline, - dict( - type='YOLOv5MixUp', - alpha=mixup_alpha, # note - beta=mixup_beta, # note - prob=mixup_prob, - pre_transform=[*pre_transform, randchoice_mosaic_pipeline], - ), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') - ), -] - -train_dataloader = dict( - batch_size=batch, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - collate_fn=dict(type='yolov5_collate'), # FASTER - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=train_ann, - data_prefix=dict(img=train_data), - filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline, - ), -) - -test_pipeline = [ - dict(type='LoadImageFromFile', backend_args=None), - dict(type='YOLOv5KeepRatioResize', scale=imgsz), - dict(type='LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), - ), -] - -val_dataloader = dict( - batch_size=val_batch, - num_workers=val_workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data), - ann_file=val_ann, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg, - ), -) - -test_dataloader = val_dataloader - -param_scheduler = None -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict( - type='SGD', - lr=lr, - momentum=momentum, - weight_decay=weight_decay, - nesterov=True, - batch_size_per_gpu=batch, - ), - constructor='YOLOv7OptimWrapperConstructor', -) - -default_hooks = dict( - param_scheduler=dict( - type='YOLOv5ParamSchedulerHook', scheduler_type='cosine', lr_factor=lr_factor, max_epochs=epochs # note - ), - checkpoint=dict( - type='CheckpointHook', - save_param_scheduler=False, - interval=save_interval, - save_best='auto', - max_keep_ckpts=max_keep_ckpts, - ), -) - -custom_hooks = [ - dict( - type='EMAHook', ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, strict_load=False, priority=49 - ) -] - -val_evaluator = dict( - type='mmdet.CocoMetric', - proposal_nums=(100, 1, 10), # Can be accelerated - ann_file=data_root + val_ann, - metric='bbox', -) -test_evaluator = val_evaluator - -train_cfg = dict( - type='EpochBasedTrainLoop', - max_epochs=epochs, - val_interval=val_interval, - dynamic_intervals=[(epochs - num_epoch_stage2, val_interval_stage2)], - _delete_=True, -) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') diff --git a/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py deleted file mode 100644 index 355e65cd..00000000 --- a/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py +++ /dev/null @@ -1,97 +0,0 @@ -_base_ = './base.py' - -# ========================Suggested optional parameters======================== -# DATA -height = 640 -width = 640 -imgsz = (width, height) # width, height -num_classes = 71 - -# TRAIN -lr_factor = 0.01 # Learning rate scaling factor -# ================================END================================= -# -----model related----- -# Data augmentation -max_translate_ratio = 0.1 # YOLOv5RandomAffine -scaling_ratio_range = (0.5, 1.6) # YOLOv5RandomAffine -mixup_prob = 0.05 # YOLOv5MixUp -randchoice_mosaic_prob = [0.8, 0.2] -mixup_alpha = 8.0 # YOLOv5MixUp -mixup_beta = 8.0 # YOLOv5MixUp - -# -----train val related----- -loss_cls_weight = 0.5 -loss_obj_weight = 1.0 -num_det_layers = 3 - -# ===============================Unmodified in most cases==================== - -model = dict( - backbone=dict(arch='Tiny', act_cfg=dict(type='LeakyReLU', negative_slope=0.1)), - neck=dict( - is_tiny_version=True, - in_channels=[128, 256, 512], - out_channels=[64, 128, 256], - block_cfg=dict(_delete_=True, type='TinyDownSampleBlock', middle_ratio=0.25), - act_cfg=dict(type='LeakyReLU', negative_slope=0.1), - use_repconv_outs=False, - ), - bbox_head=dict( - head_module=dict(in_channels=[128, 256, 512]), - loss_cls=dict(loss_weight=loss_cls_weight * (num_classes / 80 * 3 / num_det_layers)), - loss_obj=dict(loss_weight=loss_obj_weight * ((imgsz[0] / 640) ** 2 * 3 / num_det_layers)), - ), -) - -pre_transform = [dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadAnnotations', with_bbox=True)] - -mosiac4_pipeline = [ - dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - max_translate_ratio=max_translate_ratio, # change - scaling_ratio_range=scaling_ratio_range, # change - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), -] - -mosiac9_pipeline = [ - dict(type='Mosaic9', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - max_translate_ratio=max_translate_ratio, # change - scaling_ratio_range=scaling_ratio_range, # change - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), -] - -randchoice_mosaic_pipeline = dict( - type='RandomChoice', transforms=[mosiac4_pipeline, mosiac9_pipeline], prob=randchoice_mosaic_prob -) - -train_pipeline = [ - *pre_transform, - randchoice_mosaic_pipeline, - dict( - type='YOLOv5MixUp', - alpha=mixup_alpha, - beta=mixup_beta, - prob=mixup_prob, # change - pre_transform=[*pre_transform, randchoice_mosaic_pipeline], - ), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') - ), -] - -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) -default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor)) diff --git a/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py deleted file mode 100644 index e93fe802..00000000 --- a/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py +++ /dev/null @@ -1,182 +0,0 @@ -_base_ = './base.py' - -# ========================Suggested optional parameters======================== -# DATA -height = 1280 -width = 1280 -imgsz = (width, height) # height, width -num_classes = 71 # Number of classes for classification - -# TRAIN -# The only difference between P6 and P5 in terms of -# hyperparameters is lr_factor -lr_factor = 0.2 -# ================================END================================= - -# Config of batch shapes. Only on val -# It means not used if batch_shapes_cfg is None. -batch_shapes_cfg = dict( - img_size=imgsz[0], size_divisor=64 # The image scale of padding should be divided by pad_size_divisor -) # Additional paddings for pixel scale -tta_img_scales = [(1280, 1280), (1024, 1024), (1536, 1536)] - -# -----model related----- -# Basic size of multi-scale prior box -anchors = [ - [(19, 27), (44, 40), (38, 94)], # P3/8 - [(96, 68), (86, 152), (180, 137)], # P4/16 - [(140, 301), (303, 264), (238, 542)], # P5/32 - [(436, 615), (739, 380), (925, 792)], # P6/64 -] -strides = [8, 16, 32, 64] # Strides of multi-scale prior box -num_det_layers = 4 # # The number of model output scales -norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) - -# Data augmentation -max_translate_ratio = 0.2 # YOLOv5RandomAffine -scaling_ratio_range = (0.1, 2.0) # YOLOv5RandomAffine -mixup_prob = 0.15 # YOLOv5MixUp -randchoice_mosaic_prob = [0.8, 0.2] -mixup_alpha = 8.0 # YOLOv5MixUp -mixup_beta = 8.0 # YOLOv5MixUp - -# -----train val related----- -loss_cls_weight = 0.3 -loss_bbox_weight = 0.05 -loss_obj_weight = 0.7 -obj_level_weights = [4.0, 1.0, 0.25, 0.06] -simota_candidate_topk = 20 - -# ===============================Unmodified in most cases==================== -pre_transform = [dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadAnnotations', with_bbox=True)] - - -model = dict( - backbone=dict(arch='W', out_indices=(2, 3, 4, 5)), - neck=dict( - in_channels=[256, 512, 768, 1024], - out_channels=[128, 256, 384, 512], - use_maxpool_in_downsample=False, - use_repconv_outs=False, - ), - bbox_head=dict( - head_module=dict( - type='YOLOv7p6HeadModule', - in_channels=[128, 256, 384, 512], - featmap_strides=strides, - norm_cfg=norm_cfg, - act_cfg=dict(type='SiLU', inplace=True), - ), - prior_generator=dict(base_sizes=anchors, strides=strides), - simota_candidate_topk=simota_candidate_topk, # note - # scaled based on number of detection layers - loss_cls=dict(loss_weight=loss_cls_weight * (num_classes / 80 * 3 / num_det_layers)), - loss_bbox=dict(loss_weight=loss_bbox_weight * (3 / num_det_layers)), - loss_obj=dict(loss_weight=loss_obj_weight * ((imgsz[0] / 640) ** 2 * 3 / num_det_layers)), - obj_level_weights=obj_level_weights, - ), -) - -mosiac4_pipeline = [ - dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - max_translate_ratio=max_translate_ratio, # note - scaling_ratio_range=scaling_ratio_range, # note - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), -] - -mosiac9_pipeline = [ - dict(type='Mosaic9', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - max_translate_ratio=max_translate_ratio, # note - scaling_ratio_range=scaling_ratio_range, # note - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), -] - -randchoice_mosaic_pipeline = dict( - type='RandomChoice', transforms=[mosiac4_pipeline, mosiac9_pipeline], prob=randchoice_mosaic_prob -) - -train_pipeline = [ - *pre_transform, - randchoice_mosaic_pipeline, - dict( - type='YOLOv5MixUp', - alpha=mixup_alpha, # note - beta=mixup_beta, # note - prob=mixup_prob, - pre_transform=[*pre_transform, randchoice_mosaic_pipeline], - ), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') - ), -] -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) - -test_pipeline = [ - dict(type='LoadImageFromFile', backend_args=None), - dict(type='YOLOv5KeepRatioResize', scale=imgsz), - dict(type='LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), - ), -] -val_dataloader = dict(dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg)) -test_dataloader = val_dataloader - -default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor)) - -# Config for Test Time Augmentation. (TTA) -_multiscale_resize_transforms = [ - dict( - type='Compose', - transforms=[ - dict(type='YOLOv5KeepRatioResize', scale=s), - dict(type='LetterResize', scale=s, allow_scale_up=False, pad_val=dict(img=114)), - ], - ) - for s in tta_img_scales -] - -tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=None), - dict( - type='TestTimeAug', - transforms=[ - _multiscale_resize_transforms, - [dict(type='mmdet.RandomFlip', prob=1.0), dict(type='mmdet.RandomFlip', prob=0.0)], - [dict(type='mmdet.LoadAnnotations', with_bbox=True)], - [ - dict( - type='mmdet.PackDetInputs', - meta_keys=( - 'img_id', - 'img_path', - 'ori_shape', - 'img_shape', - 'scale_factor', - 'pad_param', - 'flip', - 'flip_direction', - ), - ) - ], - ], - ), -] diff --git a/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py deleted file mode 100644 index ffe368aa..00000000 --- a/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py +++ /dev/null @@ -1,12 +0,0 @@ -_base_ = './base.py' - -model = dict( - backbone=dict(arch='X'), - neck=dict( - in_channels=[640, 1280, 1280], - out_channels=[160, 320, 640], - block_cfg=dict(type='ELANBlock', middle_ratio=0.4, block_ratio=0.4, num_blocks=3, num_convs_in_block=2), - use_repconv_outs=False, - ), - bbox_head=dict(head_module=dict(in_channels=[320, 640, 1280])), -) diff --git a/configs/yolox/base_arch.py b/configs/yolox/base_arch.py deleted file mode 100644 index b7949d7a..00000000 --- a/configs/yolox/base_arch.py +++ /dev/null @@ -1,351 +0,0 @@ -_base_ = [ - '../_base_/default_runtime_det.py', -] - -default_scope = 'mmyolo' -# ========================Suggested optional parameters======================== -# MODEL -# The scaling factor that controls the depth of the network structure -deepen_factor = 0.33 -# The scaling factor that controls the width of the network structure -widen_factor = 0.5 -# Number of classes for classification -num_classes = 71 - -# DATA -# Dataset type, this will be used to define the dataset -dataset_type = 'sscma.CustomYOLOv5CocoDataset' -# Root path of data -# dataset link: https://universe.roboflow.com/team-roboflow/coco-128 -data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' -# Path of train annotation file -train_ann = 'train/_annotations.coco.json' -# Prefix of train image path -train_data = 'train/' -# Path of val annotation file -val_ann = 'valid/_annotations.coco.json' -# Prefix of val image path -val_data = 'valid/' - -height = 640 -width = 640 -imgsz = (width, height) # width, height - -# TRAIN -# Base learning rate for optim_wrapper. Corresponding to 8xb16=128 bs -lr = 0.001 -# Maximum training epochs -epochs = 300 -# batch_size -batch = 32 -# workers -workers = 4 -# Batch size of a single GPU during validation -val_batch = 1 -# Worker to pre-fetch data for each single GPU during validation -val_workers = 1 -persistent_workers = True -# Learning rate scaling factor -lr_factor = 0.01 - -weight_decay = 0.0005 -momentum = 0.937 - -val_interval=5 -# Save model checkpoint and validation intervals -save_interval = val_interval -# The maximum checkpoints to keep. -max_keep_ckpts = 3 - - -# ================================END================================= - - -# -----model related----- -# Basic size of multi-scale prior box -anchors = [ - [(10, 13), (16, 30), (33, 23)], # P3/8 - [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)], # P5/32 -] - -# -----train val related----- -model_test_cfg = dict( - # The config of multi-label for multi-class prediction. - multi_label=True, - # The number of boxes before NMS - nms_pre=30000, - score_thr=0.001, # Threshold to filter out boxes. - nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold - max_per_img=300, -) # Max number of detections of each image - - -# Config of batch shapes. Only on val. -# It means not used if batch_shapes_cfg is None. -batch_shapes_cfg = dict( - type='BatchShapePolicy', - batch_size=val_batch, - img_size=imgsz[0], - # The image scale of padding should be divided by pad_size_divisor - size_divisor=32, - # Additional paddings for pixel scale - extra_pad_ratio=0.5, -) - -# -----model related----- -# Strides of multi-scale prior box -strides = [8, 16, 32] -num_det_layers = 3 # The number of model output scales -norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) # Normalization config - -# -----train val related----- -affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio -loss_cls_weight = 1.0 -loss_bbox_weight = 5.0 -loss_obj_weight = 1.0 -loss_bbox_aux_weight = 1.0 -center_radius = 2.5 # SimOTAAssigner -prior_match_thr = 4.0 # Priori box matching threshold -# The obj loss weights of the three output layers -obj_level_weights = [4.0, 1.0, 0.4] - -# Single-scale training is recommended to -# be turned on, which can speed up training. -env_cfg = dict(cudnn_benchmark=True) - -# model arch -model = dict( - type='mmyolo.YOLODetector', - init_cfg=dict( - type='Kaiming', - layer='Conv2d', - a=2.23606797749979, # math.sqrt(5) - distribution='uniform', - mode='fan_in', - nonlinearity='leaky_relu', - ), - data_preprocessor=dict( - type='mmdet.DetDataPreprocessor', - mean=[0.0, 0.0, 0.0], - std=[255.0, 255.0, 255.0], - bgr_to_rgb=True, - ), - backbone=dict( - type='YOLOXCSPDarknet', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - out_indices=(2, 3, 4), - spp_kernal_sizes=(5, 9, 13), - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU', inplace=True), - ), - neck=dict( - type='YOLOXPAFPN', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - in_channels=[256, 512, 1024], - out_channels=256, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU', inplace=True), - ), - bbox_head=dict( - type='YOLOXHead', - head_module=dict( - type='YOLOXHeadModule', - num_classes=num_classes, - in_channels=256, - feat_channels=256, - widen_factor=widen_factor, - stacked_convs=2, - featmap_strides=(8, 16, 32), - use_depthwise=False, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU', inplace=True), - ), - loss_cls=dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='sum', - loss_weight=loss_cls_weight, - ), - loss_bbox=dict( - type='mmdet.IoULoss', - mode='square', - eps=1e-16, - reduction='sum', - loss_weight=loss_bbox_weight, - ), - loss_obj=dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='sum', - loss_weight=loss_obj_weight, - ), - loss_bbox_aux=dict(type='mmdet.L1Loss', reduction='sum', loss_weight=loss_bbox_aux_weight), - ), - train_cfg=dict( - assigner=dict( - type='mmdet.SimOTAAssigner', - center_radius=center_radius, - iou_calculator=dict(type='mmdet.BboxOverlaps2D'), - ) - ), - test_cfg=model_test_cfg, -) - -albu_train_transforms = [ - dict(type='Blur', p=0.01), - dict(type='MedianBlur', p=0.01), - dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01), -] - -pre_transform = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True), -] - -train_pipeline = [ - *pre_transform, - dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), - dict( - type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict( - type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', 'gt_ignore_flags'], - ), - keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, - ), - dict(type='YOLOv5HSVRandomAug'), - # dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', - meta_keys=( - 'img_id', - 'img_path', - 'ori_shape', - 'img_shape', - ), - ), -] - -train_dataloader = dict( - batch_size=batch, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=train_ann, - data_prefix=dict(img=train_data), - filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline, - ), -) - -test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='YOLOv5KeepRatioResize', scale=imgsz), - dict( - type='LetterResize', - scale=imgsz, - allow_scale_up=False, - pad_val=dict(img=114), - ), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor'), - ), -] - -val_dataloader = dict( - batch_size=val_batch, - num_workers=val_workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data), - ann_file=val_ann, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg, - ), -) - -test_dataloader = val_dataloader - -param_scheduler = None -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict( - type='SGD', - lr=lr, - momentum=momentum, - weight_decay=weight_decay, - nesterov=True, - batch_size_per_gpu=batch, - ), - constructor='YOLOv5OptimizerConstructor', -) - -default_hooks = dict( - param_scheduler=dict( - type='YOLOv5ParamSchedulerHook', - scheduler_type='linear', - lr_factor=lr_factor, - max_epochs=epochs, - ), - checkpoint=dict( - type='CheckpointHook', - interval=save_interval, - save_best='auto', - max_keep_ckpts=max_keep_ckpts, - ), -) - -custom_hooks = [ - dict( - type='EMAHook', - ema_type='ExpMomentumEMA', - momentum=0.0001, - update_buffers=True, - strict_load=False, - priority=49, - ) -] - -val_evaluator = dict( - type='mmdet.CocoMetric', - proposal_nums=(100, 1, 10), - ann_file=data_root + val_ann, - metric='bbox', -) -test_evaluator = val_evaluator - -train_cfg = dict( - type='EpochBasedTrainLoop', - max_epochs=epochs, - val_interval=val_interval, - _delete_=True, -) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') diff --git a/configs/yolox/yolox_tiny_1xb16_300e_coco.py b/configs/yolox/yolox_tiny_1xb16_300e_coco.py deleted file mode 100644 index e86a85ee..00000000 --- a/configs/yolox/yolox_tiny_1xb16_300e_coco.py +++ /dev/null @@ -1,349 +0,0 @@ -_base_ = [ - '../_base_/default_runtime_det.py', -] - -default_scope = 'mmyolo' -# ========================Suggested optional parameters======================== -# MODEL -# The scaling factor that controls the depth of the network structure -deepen_factor = 0.33 -# The scaling factor that controls the width of the network structure -widen_factor = 0.125 -# Number of classes for classification -num_classes = 71 - -# DATA -# Dataset type, this will be used to define the dataset -dataset_type = 'sscma.CustomYOLOv5CocoDataset' -# Root path of data -# dataset link: https://universe.roboflow.com/team-roboflow/coco-128 -data_root = 'https://universe.roboflow.com/ds/z5UOcgxZzD?key=bwx9LQUT0t' -# Path of train annotation file -train_ann = 'train/_annotations.coco.json' -# Prefix of train image path -train_data = 'train/' -# Path of val annotation file -val_ann = 'valid/_annotations.coco.json' -# Prefix of val image path -val_data = 'valid/' - -height = 640 -width = 640 -imgsz = (width, height) # width, height - -# TRAIN -# Base learning rate for optim_wrapper. Corresponding to 8xb16=128 bs -lr = 0.001 -# Maximum training epochs -epochs = 300 -# batch_size -batch = 32 -# workers -workers = 4 -# Batch size of a single GPU during validation -val_batch = 1 -# Worker to pre-fetch data for each single GPU during validation -val_workers = 1 -persistent_workers = True -# Learning rate scaling factor -lr_factor = 0.01 - -weight_decay = 0.0005 -momentum = 0.937 -val_interval = 5 -# Save model checkpoint and validation intervals -save_interval = val_interval -# The maximum checkpoints to keep. -max_keep_ckpts = 3 - -# ================================END================================= - - -# -----model related----- -# Basic size of multi-scale prior box -anchors = [ - [(10, 13), (16, 30), (33, 23)], # P3/8 - [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)], # P5/32 -] - -# -----train val related----- -model_test_cfg = dict( - # The config of multi-label for multi-class prediction. - multi_label=True, - # The number of boxes before NMS - nms_pre=30000, - score_thr=0.001, # Threshold to filter out boxes. - nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold - max_per_img=300, -) # Max number of detections of each image - - -# Config of batch shapes. Only on val. -# It means not used if batch_shapes_cfg is None. -batch_shapes_cfg = dict( - type='BatchShapePolicy', - batch_size=val_batch, - img_size=imgsz[0], - # The image scale of padding should be divided by pad_size_divisor - size_divisor=32, - # Additional paddings for pixel scale - extra_pad_ratio=0.5, -) - -# -----model related----- -# Strides of multi-scale prior box -strides = [8, 16, 32] -num_det_layers = 3 # The number of model output scales -norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) # Normalization config - -# -----train val related----- -affine_scale = 0.5 # YOLOv5RandomAffine scaling ratio -loss_cls_weight = 1.0 -loss_bbox_weight = 5.0 -loss_obj_weight = 1.0 -loss_bbox_aux_weight = 1.0 -center_radius = 2.5 # SimOTAAssigner -prior_match_thr = 4.0 # Priori box matching threshold -# The obj loss weights of the three output layers -obj_level_weights = [4.0, 1.0, 0.4] - -# Single-scale training is recommended to -# be turned on, which can speed up training. -env_cfg = dict(cudnn_benchmark=True) - -# model arch -model = dict( - type='mmyolo.YOLODetector', - init_cfg=dict( - type='Kaiming', - layer='Conv2d', - a=2.23606797749979, # math.sqrt(5) - distribution='uniform', - mode='fan_in', - nonlinearity='leaky_relu', - ), - data_preprocessor=dict( - type='mmdet.DetDataPreprocessor', - mean=[0.0, 0.0, 0.0], - std=[255.0, 255.0, 255.0], - bgr_to_rgb=True, - ), - backbone=dict( - type='YOLOXCSPDarknet', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - out_indices=(2, 3, 4), - spp_kernal_sizes=(5, 9, 13), - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU', inplace=True), - ), - neck=dict( - type='YOLOXPAFPN', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - in_channels=[256, 512, 1024], - out_channels=256, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU', inplace=True), - ), - bbox_head=dict( - type='YOLOXHead', - head_module=dict( - type='YOLOXHeadModule', - num_classes=num_classes, - in_channels=256, - feat_channels=256, - widen_factor=widen_factor, - stacked_convs=2, - featmap_strides=(8, 16, 32), - use_depthwise=False, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU', inplace=True), - ), - loss_cls=dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='sum', - loss_weight=loss_cls_weight, - ), - loss_bbox=dict( - type='mmdet.IoULoss', - mode='square', - eps=1e-16, - reduction='sum', - loss_weight=loss_bbox_weight, - ), - loss_obj=dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='sum', - loss_weight=loss_obj_weight, - ), - loss_bbox_aux=dict(type='mmdet.L1Loss', reduction='sum', loss_weight=loss_bbox_aux_weight), - ), - train_cfg=dict( - assigner=dict( - type='mmdet.SimOTAAssigner', - center_radius=center_radius, - iou_calculator=dict(type='mmdet.BboxOverlaps2D'), - ) - ), - test_cfg=model_test_cfg, -) - -albu_train_transforms = [ - dict(type='Blur', p=0.01), - dict(type='MedianBlur', p=0.01), - dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01), -] - -pre_transform = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True), -] - -train_pipeline = [ - *pre_transform, - dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform), - dict( - type='YOLOv5RandomAffine', - max_rotate_degree=0.0, - max_shear_degree=0.0, - scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), - # imgsz is (width, height) - border=(-imgsz[0] // 2, -imgsz[1] // 2), - border_val=(114, 114, 114), - ), - dict( - type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict( - type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', 'gt_ignore_flags'], - ), - keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, - ), - dict(type='YOLOv5HSVRandomAug'), - # dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.PackDetInputs', - meta_keys=( - 'img_id', - 'img_path', - 'ori_shape', - 'img_shape', - ), - ), -] - -train_dataloader = dict( - batch_size=batch, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=train_ann, - data_prefix=dict(img=train_data), - filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline, - ), -) - -test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='YOLOv5KeepRatioResize', scale=imgsz), - dict( - type='LetterResize', - scale=imgsz, - allow_scale_up=False, - pad_val=dict(img=114), - ), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor'), - ), -] - -val_dataloader = dict( - batch_size=val_batch, - num_workers=val_workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data), - ann_file=val_ann, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg, - ), -) - -test_dataloader = val_dataloader - -param_scheduler = None -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict( - type='SGD', - lr=lr, - momentum=momentum, - weight_decay=weight_decay, - nesterov=True, - batch_size_per_gpu=batch, - ), - constructor='YOLOv5OptimizerConstructor', -) - -default_hooks = dict( - param_scheduler=dict( - type='YOLOv5ParamSchedulerHook', - scheduler_type='linear', - lr_factor=lr_factor, - max_epochs=epochs, - ), - checkpoint=dict( - type='CheckpointHook', - interval=save_interval, - save_best='auto', - max_keep_ckpts=max_keep_ckpts, - ), -) - -custom_hooks = [ - dict( - type='EMAHook', - ema_type='ExpMomentumEMA', - momentum=0.0001, - update_buffers=True, - strict_load=False, - priority=49, - ) -] - -val_evaluator = dict( - type='mmdet.CocoMetric', - proposal_nums=(100, 1, 10), - ann_file=data_root + val_ann, - metric='bbox', -) -test_evaluator = val_evaluator - -train_cfg = dict( - type='EpochBasedTrainLoop', - max_epochs=epochs, - val_interval=val_interval, - _delete_=True, -) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') diff --git a/requirements/base.txt b/requirements/base.txt index 45ec5d2b..66c012d5 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,5 +1,7 @@ # common albumentations>=1.3.0 +libusb1 + # sensor cbor diff --git a/requirements/inference.txt b/requirements/inference.txt index 11fae067..ef713162 100644 --- a/requirements/inference.txt +++ b/requirements/inference.txt @@ -1,4 +1,5 @@ libusb1>=3.0.0 +pnnx==0.0.4 ncnn>=1.0.20230517 onnx>=1.14.0 onnxmltools>=1.11.2 diff --git a/requirements/mmlab.txt b/requirements/mmlab.txt index 451a9e5e..f5e98891 100644 --- a/requirements/mmlab.txt +++ b/requirements/mmlab.txt @@ -1,6 +1,6 @@ # use openmim to install mmcls>=1.0.0.rc6 -mmcv>=2.0.0 +mmcv<=2.1.0 mmdet>=3.0.0, <3.1.0 # mmyolo currently does not support mmdet 3.1.0 mmengine>=0.8.2 mmpose>=1.0.0 diff --git a/requirements/pytorch_cpu.txt b/requirements/pytorch_cpu.txt index 27f854db..ed22dab4 100644 --- a/requirements/pytorch_cpu.txt +++ b/requirements/pytorch_cpu.txt @@ -1,5 +1,5 @@ # -i https://download.pytorch.org/whl/cpu -torch>=2.0.0 -torchaudio>=2.0.0 -torchvision>=0.15.0 +torch<=2.0.1 +torchaudio<=2.0.2 +torchvision<=0.15.2 diff --git a/requirements/pytorch_cuda.txt b/requirements/pytorch_cuda.txt index 29298aa7..990e36a2 100644 --- a/requirements/pytorch_cuda.txt +++ b/requirements/pytorch_cuda.txt @@ -1,5 +1,5 @@ --i https://download.pytorch.org/whl/cu117 +-i https://download.pytorch.org/whl/cu118 -torch>=2.0.0 -torchaudio>=2.0.0 -torchvision>=0.15.0 +torch<=2.0.1 +torchaudio<=2.0.2 +torchvision<=0.15.2 diff --git a/scripts/test_functional.sh b/scripts/test_functional.sh index afe4de97..d855b428 100755 --- a/scripts/test_functional.sh +++ b/scripts/test_functional.sh @@ -16,7 +16,7 @@ classification_test() # detection case detection_test() { - CONFIG_FILE="configs/yolov5/yolov5_tiny_1xb16_300e_coco.py" + CONFIG_FILE="configs/swift_yolo/swift_yolo_tiny_1xb16_300e_coco.py" DATASETS_URL="https://files.seeedstudio.com/sscma/datasets/COCO128.zip" functional_test_core "$1" "${CONFIG_FILE}" "${DATASETS_URL}" diff --git a/sscma/datasets/cocodataset.py b/sscma/datasets/cocodataset.py index 63cc6850..46b25ca6 100644 --- a/sscma/datasets/cocodataset.py +++ b/sscma/datasets/cocodataset.py @@ -1,8 +1,9 @@ import json import os.path as osp -from typing import Optional, Sequence +from typing import Optional, Sequence, List from mmdet.datasets.coco import CocoDataset +from mmengine.fileio import get_local_path from sscma.registry import DATASETS @@ -133,3 +134,45 @@ def __init__( data_root=data_root, **kwargs, ) + + def load_data_list(self) -> List[dict]: + """Load annotations from an annotation file named as ``self.ann_file`` + + Returns: + List[dict]: A list of annotation. + """ # noqa: E501 + + with get_local_path(self.ann_file, backend_args=self.backend_args) as local_path: + self.coco = self.COCOAPI(local_path) + # The order of returned `cat_ids` will not + # change with the order of the `classes` + self.cat_ids = self.coco.get_cat_ids( + cat_names=self.metainfo['classes'] + if len(self.metainfo['classes']) + else [cat['name'] for cat in self.coco.dataset['categories'] if (cat['supercategory'] != "none")], + sup_names=[ + cat['supercategory'] for cat in self.coco.dataset['categories'] if (cat['supercategory'] != "none") + ], + ) + self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)} + self.cat_img_map = self.coco.cat_img_map + + img_ids = self.coco.get_img_ids() + data_list = [] + total_ann_ids = [] + for img_id in img_ids: + raw_img_info = self.coco.load_imgs([img_id])[0] + raw_img_info['img_id'] = img_id + + ann_ids = self.coco.get_ann_ids(img_ids=[img_id]) + raw_ann_info = self.coco.load_anns(ann_ids) + total_ann_ids.extend(ann_ids) + + parsed_data_info = self.parse_data_info({'raw_ann_info': raw_ann_info, 'raw_img_info': raw_img_info}) + data_list.append(parsed_data_info) + if self.ANN_ID_UNIQUE: + assert len(set(total_ann_ids)) == len(total_ann_ids), f"Annotation ids in '{self.ann_file}' are not unique!" + + del self.coco + + return data_list diff --git a/sscma/datasets/transforms/__init__.py b/sscma/datasets/transforms/__init__.py index 797403bb..d9d59dc8 100644 --- a/sscma/datasets/transforms/__init__.py +++ b/sscma/datasets/transforms/__init__.py @@ -1,4 +1,5 @@ from .formatting import PackSensorInputs from .loading import LoadSensorFromFile +from .wrappers import MutiBranchPipe -__all__ = ['PackSensorInputs', 'LoadSensorFromFile'] +__all__ = ['PackSensorInputs', 'LoadSensorFromFile', 'MutiBranchPipe'] diff --git a/sscma/datasets/transforms/wrappers.py b/sscma/datasets/transforms/wrappers.py index 92bc17a1..edea1e4f 100644 --- a/sscma/datasets/transforms/wrappers.py +++ b/sscma/datasets/transforms/wrappers.py @@ -17,7 +17,7 @@ def transform(self, results: Dict) -> Optional[Union[Dict, Tuple[List, List]]]: multi_results[branch] = {'inputs': None, 'data_samples': None} for branch, pipeline in self.branch_pipelines.items(): branch_results = pipeline(copy.deepcopy(results)) - if branch == 'unsup_teacher': + if branch == self.piece_key: results['img'] = branch_results['inputs'].permute(1, 2, 0).cpu().numpy() # If one branch pipeline returns None, # it will sample another data from dataset. diff --git a/sscma/models/backbones/__init__.py b/sscma/models/backbones/__init__.py index 2ae3dadf..9bc2d1c9 100644 --- a/sscma/models/backbones/__init__.py +++ b/sscma/models/backbones/__init__.py @@ -3,7 +3,7 @@ from .MobileNetv2 import MobileNetv2 from .MobileNetv3 import MobileNetV3 from .pfld_mobilenet_v2 import PfldMobileNetV2 -from .shufflenetv2 import CustomShuffleNetV2 +from .shufflenetv2 import CustomShuffleNetV2, FastShuffleNetV2 from .ShuffleNetV2 import ShuffleNetV2 from .SoundNet import SoundNetRaw from .SqueezeNet import SqueezeNet @@ -20,4 +20,5 @@ 'EfficientNet', 'MobileNetv2', 'MicroNet', + "FastShuffleNetV2", ] diff --git a/sscma/models/backbones/shufflenetv2.py b/sscma/models/backbones/shufflenetv2.py index d3825f4b..5797ac83 100644 --- a/sscma/models/backbones/shufflenetv2.py +++ b/sscma/models/backbones/shufflenetv2.py @@ -1,12 +1,132 @@ import copy - +from typing import Tuple +import torch import torch.nn as nn from mmcv.cnn import ConvModule +from mmengine.model.base_module import BaseModule from mmpose.models.backbones.shufflenet_v2 import ShuffleNetV2 from sscma.registry import BACKBONES +class ShuffleV2Block(nn.Module): + """ + Reference: https://github.com/dog-qiuqiu/FastestDet/blob/50473cd155cb088aa4a99e64ff6a4b3c24fa07e1/module/shufflenetv2.py#L4 + """ + + def __init__(self, inp, oup, mid_channels, *, ksize, stride) -> None: + super(ShuffleV2Block, self).__init__() + self.stride = stride + assert stride in [1, 2] + + self.mid_channels = mid_channels + self.ksize = ksize + pad = ksize // 2 + self.pad = pad + self.inp = inp + + outputs = oup - inp + + branch_main = [ + # pw + nn.Conv2d(inp, mid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(mid_channels), + nn.ReLU(inplace=True), + # dw + nn.Conv2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False), + nn.BatchNorm2d(mid_channels), + # pw-linear + nn.Conv2d(mid_channels, outputs, 1, 1, 0, bias=False), + nn.BatchNorm2d(outputs), + nn.ReLU(inplace=True), + ] + self.branch_main = nn.Sequential(*branch_main) + + if stride == 2: + branch_proj = [ + # dw + nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False), + nn.BatchNorm2d(inp), + # pw-linear + nn.Conv2d(inp, inp, 1, 1, 0, bias=False), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), + ] + self.branch_proj = nn.Sequential(*branch_proj) + else: + self.branch_proj = None + + def forward(self, old_x) -> torch.Tensor: + if self.stride == 1: + x_proj, x = self.channel_shuffle(old_x) + return torch.cat((x_proj, self.branch_main(x)), 1) + elif self.stride == 2: + x_proj = old_x + x = old_x + return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) + + def channel_shuffle(self, x) -> Tuple[torch.Tensor]: + batchsize, num_channels, height, width = x.data.size() + assert num_channels % 4 == 0 + x = x.reshape(batchsize * num_channels // 2, 2, height * width) + x = x.permute(1, 0, 2) + x = x.reshape(2, -1, num_channels // 2, height, width) + return x[0], x[1] + + +@BACKBONES.register_module() +class FastShuffleNetV2(BaseModule): + """ + Reference: https://github.com/dog-qiuqiu/FastestDet/blob/50473cd155cb088aa4a99e64ff6a4b3c24fa07e1/module/shufflenetv2.py#L64C6-L64C7 + """ + + def __init__(self, stage_repeats, stage_out_channels, *args, **kwargs) -> None: + super(FastShuffleNetV2, self).__init__(*args, **kwargs) + + self.stage_repeats = stage_repeats + self.stage_out_channels = stage_out_channels + + # building first layer + input_channel = self.stage_out_channels[1] + self.first_conv = nn.Sequential( + nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), + nn.BatchNorm2d(input_channel), + nn.ReLU(inplace=True), + ) + + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + stage_names = ["stage2", "stage3", "stage4"] + for idxstage in range(len(self.stage_repeats)): + numrepeat = self.stage_repeats[idxstage] + output_channel = self.stage_out_channels[idxstage + 2] + stageSeq = [] + for i in range(numrepeat): + if i == 0: + stageSeq.append( + ShuffleV2Block( + input_channel, output_channel, mid_channels=output_channel // 2, ksize=3, stride=2 + ) + ) + else: + stageSeq.append( + ShuffleV2Block( + input_channel // 2, output_channel, mid_channels=output_channel // 2, ksize=3, stride=1 + ) + ) + input_channel = output_channel + setattr(self, stage_names[idxstage], nn.Sequential(*stageSeq)) + + def forward(self, x) -> Tuple[torch.Tensor]: + x = self.first_conv(x) + x = self.maxpool(x) + P1 = self.stage2(x) + P2 = self.stage3(P1) + P3 = self.stage4(P2) + + return (P1, P2, P3) + + @BACKBONES.register_module() class CustomShuffleNetV2(ShuffleNetV2): def __init__( diff --git a/tools/export.py b/tools/export.py index 6facacd4..0bf443b5 100644 --- a/tools/export.py +++ b/tools/export.py @@ -1,17 +1,21 @@ import argparse import os import tempfile - +import sys +import os.path as osp import torch from tqdm import tqdm +current_path = osp.dirname(osp.abspath(__file__)) +sys.path.append(osp.dirname(current_path)) + # TODO: Move to config file import sscma.datasets # noqa import sscma.engine # noqa import sscma.evaluation # noqa import sscma.models # noqa import sscma.visualization # noqa -from sscma.utils.check import check_lib +from sscma.utils.check import check_lib # noqa def parse_args(): @@ -446,7 +450,8 @@ def export_vela(args, model): if args.vela is not None: for key, value in args.vela.items(): vela_args.append('--' + key) - vela_args.append(value) + vela_args.append(str(value)) + vela_main(vela_args) diff --git a/tools/inference.py b/tools/inference.py index 99879c7d..0972c4d4 100644 --- a/tools/inference.py +++ b/tools/inference.py @@ -1,9 +1,13 @@ import argparse import os import tempfile - +import sys +import os.path as osp import torch +current_path = osp.dirname(osp.abspath(__file__)) +sys.path.append(osp.dirname(current_path)) + # TODO: Move to config file import sscma.datasets # noqa import sscma.engine # noqa diff --git a/tools/train.py b/tools/train.py index 84b1703f..18376861 100644 --- a/tools/train.py +++ b/tools/train.py @@ -1,9 +1,13 @@ import argparse import os import tempfile - +import sys +import os.path as osp import torch +current_path = osp.dirname(osp.abspath(__file__)) +sys.path.append(osp.dirname(current_path)) + # TODO: Move to config file import sscma.datasets # noqa import sscma.engine # noqa