config/Transformer/nanodet-t.yml

# NanoDet-m with transformer attention
# COCO mAP(0.5:0.95) = 0.217
#             AP_50  = 0.363
#             AP_75  = 0.218
#           AP_small = 0.069
#               AP_m = 0.214
#               AP_l = 0.364

save_dir: workspace/nanodet_t
model:
  arch:
    name: OneStageDetector
    backbone:
      name: ShuffleNetV2
      model_size: 1.0x
      out_stages: [2,3,4]
      activation: LeakyReLU
    fpn:
      name: TAN # transformer attention network
      in_channels: [116, 232, 464]
      out_channels: 128
      feature_hw: [20,20] # size for position embedding
      num_heads: 8
      num_encoders: 1
      mlp_ratio: 4
      dropout_ratio: 0.1
      activation: LeakyReLU
    head:
      name: NanoDetHead
      num_classes: 80
      input_channel: 128
      feat_channels: 128
      stacked_convs: 2
      share_cls_reg: True
      octave_base_scale: 5
      scales_per_octave: 1
      strides: [8, 16, 32]
      reg_max: 7
      norm_cfg:
        type: BN
      loss:
        loss_qfl:
          name: QualityFocalLoss
          use_sigmoid: True
          beta: 2.0
          loss_weight: 1.0
        loss_dfl:
          name: DistributionFocalLoss
          loss_weight: 0.25
        loss_bbox:
          name: GIoULoss
          loss_weight: 2.0
data:
  train:
    name: CocoDataset
    img_path: coco/train2017
    ann_path: coco/annotations/instances_train2017.json
    input_size: [320,320] #[w,h]
    keep_ratio: True
    pipeline:
      perspective: 0.0
      scale: [0.6, 1.4]
      stretch: [[1, 1], [1, 1]]
      rotation: 0
      shear: 0
      translate: 0.2
      flip: 0.5
      brightness: 0.2
      contrast: [0.8, 1.2]
      saturation: [0.8, 1.2]
      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
  val:
    name: CocoDataset
    img_path: coco/val2017
    ann_path: coco/annotations/instances_val2017.json
    input_size: [320,320] #[w,h]
    keep_ratio: True
    pipeline:
      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
  gpu_ids: [0]
  workers_per_gpu: 8
  batchsize_per_gpu: 160
schedule:
  resume:
#  load_model: YOUR_MODEL_PATH
  optimizer:
    name: SGD
    lr: 0.14
    momentum: 0.9
    weight_decay: 0.0001
  warmup:
    name: linear
    steps: 500
    ratio: 0.01
  total_epochs: 190
  lr_schedule:
    name: MultiStepLR
    milestones: [140,170,180,185]
    gamma: 0.1
  val_intervals: 10
evaluator:
  name: CocoDetectionEvaluator
  save_key: mAP

log:
  interval: 10

class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']