fix configs and metaformer

Westlake-AI · Jul 12, 2023 · d66ed40 · d66ed40
1 parent cb53749
commit d66ed40
Show file tree

Hide file tree

Showing 35 changed files with 1,350 additions and 32 deletions.
diff --git a/configs/classification/_base_/models/metaformer/identityformer_m36.py b/configs/classification/_base_/models/metaformer/identityformer_m36.py
@@ -0,0 +1,24 @@
+# model settings
+model = dict(
+    type='MixUpClassification',
+    pretrained=None,
+    alpha=[0.8, 1.0,],
+    mix_mode=["mixup", "cutmix",],
+    mix_args=dict(),
+    backbone=dict(
+        type='MetaFormer',
+        arch='identityformer_m36',
+        drop_path_rate=0.3,
+    ),
+    head=dict(
+        type='MetaFormerClsHead',
+        loss=dict(type='LabelSmoothLoss',
+            label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0),
+        with_avg_pool=True,
+        head_dropout=0.,
+        in_channels=768, num_classes=1000),
+    init_cfg=[
+        dict(type='TruncNormal', layer=['Conv2d', 'Linear'], std=0.02, bias=0.),
+        dict(type='Constant', layer=['LayerNorm', 'GroupNorm'], val=1., bias=0.)
+    ],
+)
diff --git a/configs/classification/_base_/models/metaformer/identityformer_s12.py b/configs/classification/_base_/models/metaformer/identityformer_s12.py
@@ -0,0 +1,24 @@
+# model settings
+model = dict(
+    type='MixUpClassification',
+    pretrained=None,
+    alpha=[0.8, 1.0,],
+    mix_mode=["mixup", "cutmix",],
+    mix_args=dict(),
+    backbone=dict(
+        type='MetaFormer',
+        arch='identityformer_s12',
+        drop_path_rate=0.1,
+    ),
+    head=dict(
+        type='MetaFormerClsHead',
+        loss=dict(type='LabelSmoothLoss',
+            label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0),
+        with_avg_pool=True,
+        head_dropout=0.,
+        in_channels=512, num_classes=1000),
+    init_cfg=[
+        dict(type='TruncNormal', layer=['Conv2d', 'Linear'], std=0.02, bias=0.),
+        dict(type='Constant', layer=['LayerNorm', 'GroupNorm'], val=1., bias=0.)
+    ],
+)
diff --git a/configs/classification/_base_/models/metaformer/poolformerv2_m36.py b/configs/classification/_base_/models/metaformer/poolformerv2_m36.py
@@ -0,0 +1,24 @@
+# model settings
+model = dict(
+    type='MixUpClassification',
+    pretrained=None,
+    alpha=[0.8, 1.0,],
+    mix_mode=["mixup", "cutmix",],
+    mix_args=dict(),
+    backbone=dict(
+        type='MetaFormer',
+        arch='poolformerv2_m36',
+        drop_path_rate=0.3,
+    ),
+    head=dict(
+        type='MetaFormerClsHead',
+        loss=dict(type='LabelSmoothLoss',
+            label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0),
+        with_avg_pool=True,
+        head_dropout=0.,
+        in_channels=768, num_classes=1000),
+    init_cfg=[
+        dict(type='TruncNormal', layer=['Conv2d', 'Linear'], std=0.02, bias=0.),
+        dict(type='Constant', layer=['LayerNorm', 'GroupNorm'], val=1., bias=0.)
+    ],
+)
diff --git a/configs/classification/_base_/models/metaformer/poolformerv2_s12.py b/configs/classification/_base_/models/metaformer/poolformerv2_s12.py
@@ -0,0 +1,24 @@
+# model settings
+model = dict(
+    type='MixUpClassification',
+    pretrained=None,
+    alpha=[0.8, 1.0,],
+    mix_mode=["mixup", "cutmix",],
+    mix_args=dict(),
+    backbone=dict(
+        type='MetaFormer',
+        arch='poolformerv2_s12',
+        drop_path_rate=0.1,
+    ),
+    head=dict(
+        type='MetaFormerClsHead',
+        loss=dict(type='LabelSmoothLoss',
+            label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0),
+        with_avg_pool=True,
+        head_dropout=0.,
+        in_channels=512, num_classes=1000),
+    init_cfg=[
+        dict(type='TruncNormal', layer=['Conv2d', 'Linear'], std=0.02, bias=0.),
+        dict(type='Constant', layer=['LayerNorm', 'GroupNorm'], val=1., bias=0.)
+    ],
+)
diff --git a/configs/classification/_base_/models/metaformer/randformer_m36.py b/configs/classification/_base_/models/metaformer/randformer_m36.py
@@ -0,0 +1,24 @@
+# model settings
+model = dict(
+    type='MixUpClassification',
+    pretrained=None,
+    alpha=[0.8, 1.0,],
+    mix_mode=["mixup", "cutmix",],
+    mix_args=dict(),
+    backbone=dict(
+        type='MetaFormer',
+        arch='randformer_m36',
+        drop_path_rate=0.15,
+    ),
+    head=dict(
+        type='MetaFormerClsHead',
+        loss=dict(type='LabelSmoothLoss',
+            label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0),
+        with_avg_pool=True,
+        head_dropout=0.,
+        in_channels=768, num_classes=1000),
+    init_cfg=[
+        dict(type='TruncNormal', layer=['Conv2d', 'Linear'], std=0.02, bias=0.),
+        dict(type='Constant', layer=['LayerNorm', 'GroupNorm'], val=1., bias=0.)
+    ],
+)
diff --git a/configs/classification/_base_/models/metaformer/randformer_s12.py b/configs/classification/_base_/models/metaformer/randformer_s12.py
@@ -0,0 +1,24 @@
+# model settings
+model = dict(
+    type='MixUpClassification',
+    pretrained=None,
+    alpha=[0.8, 1.0,],
+    mix_mode=["mixup", "cutmix",],
+    mix_args=dict(),
+    backbone=dict(
+        type='MetaFormer',
+        arch='randformer_s12',
+        drop_path_rate=0.1,
+    ),
+    head=dict(
+        type='MetaFormerClsHead',
+        loss=dict(type='LabelSmoothLoss',
+            label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0),
+        with_avg_pool=True,
+        head_dropout=0.,
+        in_channels=512, num_classes=1000),
+    init_cfg=[
+        dict(type='TruncNormal', layer=['Conv2d', 'Linear'], std=0.02, bias=0.),
+        dict(type='Constant', layer=['LayerNorm', 'GroupNorm'], val=1., bias=0.)
+    ],
+)
diff --git a/configs/classification/cifar100/README.md b/configs/classification/cifar100/README.md
@@ -97,8 +97,8 @@ We summarize mixup benchmarks in [Model Zoo](https://github.com/Westlake-AI/open
 | PuzzleMix     |     2    |    73.60    |    80.33   |
 | ResizeMix*    |     1    |    68.45    |    80.16   |
 | TransMix      |   0.8,1  |    76.17    |      -     |
-| AutoMix       |     2    |    74.87    |    82.67   |
-| SAMix*        |     2    |             |            |
+| AutoMix       |     2    |    76.24    |    82.67   |
+| SAMix*        |     2    |    77.94    |            |
 
 ## Citation
 

diff --git a/configs/classification/imagenet/metaformer/README.md b/configs/classification/imagenet/metaformer/README.md
@@ -16,24 +16,46 @@ This page is based on the [official repo](https://github.com/sail-sg/metaformer)
 
 ### ImageNet-1k
 
-| Model | Resolution | Params | MACs | Top1 Acc | Download |
-| :---     |   :---:    |  :---: |  :---:  |  :---:  |  :---:  |
-| caformer_s18 | 224 | 26M | 4.1G |  83.6 | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_s18.pth) |
-| caformer_s18_384 | 384 | 26M | 13.4G |  85.0 | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_s18_384.pth) |
-| caformer_s36 | 224 | 39M | 8.0G |  84.5 | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_s36.pth) |
-| caformer_s36_384 | 384 | 39M | 26.0G |  85.7 | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_s36_384.pth) |
-| caformer_m36 | 224 | 56M | 13.2G |  85.2 | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_m36.pth) |
-| caformer_m36_384 | 384 | 56M | 42.0G |  86.2 | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_m36_384.pth) |
-| caformer_b36 | 224 | 99M | 23.2G |  **85.5**\* | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_b36.pth) |
-| caformer_b36_384 | 384 | 99M | 72.2G |  **86.4** | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_b36_384.pth) |
-| convformer_s18 | 224 | 27M | 3.9G |  83.0 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_s18.pth) |
-| convformer_s18_384 | 384 | 27M | 11.6G |  84.4 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_s18_384.pth) |
-| convformer_s36 | 224 | 40M | 7.6G |  84.1 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_s36.pth) |
-| convformer_s36_384 | 384 | 40M | 22.4G |  85.4 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_s36_384.pth) |
-| convformer_m36 | 224 | 57M | 12.8G |  84.5 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_m36.pth) |
-| convformer_m36_384 | 384 | 57M | 37.7G |  85.6 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_m36_384.pth) |
-| convformer_b36 | 224 | 100M | 22.6G |  84.8 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_b36.pth) |
-| convformer_b36_384 | 384 | 100M | 66.5G |  85.7 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_b36_384.pth) |
+#### Models with Common Token Mixers
+
+| Model | Resolution | Params | MACs  | Top1 Acc | Download |
+| :---: | :--------: | :----: | :---: | :------: | :------: |
+| caformer_s18\* | 224 | 26M | 4.1G |  83.6 | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_s18.pth) |
+| caformer_s18_384\* | 384 | 26M | 13.4G |  85.0 | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_s18_384.pth) |
+| caformer_s36\* | 224 | 39M | 8.0G |  84.5 | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_s36.pth) |
+| caformer_s36_384\* | 384 | 39M | 26.0G |  85.7 | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_s36_384.pth) |
+| caformer_m36\* | 224 | 56M | 13.2G |  85.2 | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_m36.pth) |
+| caformer_m36_384\* | 384 | 56M | 42.0G |  86.2 | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_m36_384.pth) |
+| caformer_b36\* | 224 | 99M | 23.2G |  **85.5**\* | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_b36.pth) |
+| caformer_b36_384\* | 384 | 99M | 72.2G |  **86.4** | [here](https://huggingface.co/sail/dl/resolve/main/caformer/caformer_b36_384.pth) |
+| convformer_s18\* | 224 | 27M | 3.9G |  83.0 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_s18.pth) |
+| convformer_s18_384\* | 384 | 27M | 11.6G |  84.4 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_s18_384.pth) |
+| convformer_s36\* | 224 | 40M | 7.6G |  84.1 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_s36.pth) |
+| convformer_s36_384\* | 384 | 40M | 22.4G |  85.4 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_s36_384.pth) |
+| convformer_m36\* | 224 | 57M | 12.8G |  84.5 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_m36.pth) |
+| convformer_m36_384\* | 384 | 57M | 37.7G |  85.6 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_m36_384.pth) |
+| convformer_b36\* | 224 | 100M | 22.6G |  84.8 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_b36.pth) |
+| convformer_b36_384\* | 384 | 100M | 66.5G |  85.7 | [here](https://huggingface.co/sail/dl/resolve/main/convformer/convformer_b36_384.pth) |
+
+#### Models with Basic Token Mixers
+
+| Model | Resolution | Params | MACs  | Top1 Acc | Download |
+| :---: | :--------: | :----: | :---: | :------: | :------: |
+| identityformer_s12\* | 224 | 11.9M | 1.8G |  74.6 | [here](https://huggingface.co/sail/dl/resolve/main/identityformer/identityformer_s12.pth) |
+| identityformer_s24\* | 224 | 21.3M | 3.4G |  78.2 | [here](https://huggingface.co/sail/dl/resolve/main/identityformer/identityformer_s24.pth) |
+| identityformer_s36\* | 224 | 30.8M | 5.0G |  79.3 | [here](https://huggingface.co/sail/dl/resolve/main/identityformer/identityformer_s36.pth) |
+| identityformer_m36\* | 224 | 56.1M | 8.8G |  80.0 | [here](https://huggingface.co/sail/dl/resolve/main/identityformer/identityformer_m36.pth) |
+| identityformer_m48\* | 224 | 73.3M | 11.5G |  80.4 | [here](https://huggingface.co/sail/dl/resolve/main/identityformer/identityformer_m48.pth) |
+| randformer_s12\* | 224 | 11.9 + <ins>0.2</ins>M | 1.9G |  76.6 | [here](https://huggingface.co/sail/dl/resolve/main/randformer/randformer_s12.pth) |
+| randformer_s24\* | 224 | 21.3 + <ins>0.5</ins>M | 3.5G |  78.2 | [here](https://huggingface.co/sail/dl/resolve/main/randformer/randformer_s24.pth) |
+| randformer_s36\* | 224 | 30.8 + <ins>0.7</ins>M | 5.2G |  79.5 | [here](https://huggingface.co/sail/dl/resolve/main/randformer/randformer_s36.pth) |
+| randformer_m36\* | 224 | 56.1 + <ins>0.7</ins>M | 9.0G |  81.2 | [here](https://huggingface.co/sail/dl/resolve/main/randformer/randformer_m36.pth) |
+| randformer_m48\* | 224 | 73.3 + <ins>0.9</ins>M | 11.9G |  81.4 | [here](https://huggingface.co/sail/dl/resolve/main/randformer/randformer_m48.pth) |
+| poolformerv2_s12\* | 224 | 11.9M | 1.8G |  78.0 | [here](https://huggingface.co/sail/dl/resolve/main/poolformerv2/poolformerv2_s12.pth) |
+| poolformerv2_s24\* | 224 | 21.3M | 3.4G |  80.7 | [here](https://huggingface.co/sail/dl/resolve/main/poolformerv2/poolformerv2_s24.pth) |
+| poolformerv2_s36\* | 224 | 30.8M | 5.0G |  81.6 | [here](https://huggingface.co/sail/dl/resolve/main/poolformerv2/poolformerv2_s36.pth) |
+| poolformerv2_m36\* | 224 | 56.1M | 8.8G |  82.2 | [here](https://huggingface.co/sail/dl/resolve/main/poolformerv2/poolformerv2_m36.pth) |
+| poolformerv2_m48\* | 224 | 73.3M | 11.5G |  82.6 | [here](https://huggingface.co/sail/dl/resolve/main/poolformerv2/poolformerv2_m48.pth) |
 
 We mainly follow the original training setting provided by the [official repo](https://github.com/sail-sg/metaformer) to construct config files. *Models with * are converted from the [official repo](https://github.com/sail-sg/metaformer).*
 

diff --git a/configs/classification/imagenet/metaformer/identityformer_m36_8xb128_ac4_ep300.py b/configs/classification/imagenet/metaformer/identityformer_m36_8xb128_ac4_ep300.py
@@ -0,0 +1,42 @@
+_base_ = [
+    '../../_base_/models/metaformer/identityformer_m36.py',
+    '../../_base_/datasets/imagenet/poolformer_m_sz224_8xbs128.py',
+    '../../_base_/default_runtime.py',
+]
+
+# data
+data = dict(imgs_per_gpu=128, workers_per_gpu=8)
+
+# additional hooks
+update_interval = 4  # total: 8 x bs128 x 4 accumulates = bs4096
+
+# optimizer
+optimizer = dict(
+    type='AdamW',
+    lr=4e-3,  # lr / bs4096
+    weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999),
+    paramwise_options={
+        '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
+        'norm': dict(weight_decay=0.),
+        'bias': dict(weight_decay=0.),
+        'layer_scale': dict(weight_decay=0.),
+        'res_scale': dict(weight_decay=0.),
+    })
+
+# fp16
+use_fp16 = False
+fp16 = dict(type='mmcv', loss_scale='dynamic')
+optimizer_config = dict(
+    grad_clip=dict(max_norm=5.0), update_interval=update_interval)
+
+# lr scheduler
+lr_config = dict(
+    policy='CosineAnnealing',
+    by_epoch=False, min_lr=1e-6,
+    warmup='linear',
+    warmup_iters=5, warmup_by_epoch=True,
+    warmup_ratio=1e-6,
+)
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=300)
diff --git a/configs/classification/imagenet/metaformer/identityformer_m48_8xb128_ac4_ep300.py b/configs/classification/imagenet/metaformer/identityformer_m48_8xb128_ac4_ep300.py
@@ -0,0 +1,48 @@
+_base_ = [
+    '../../_base_/models/metaformer/identityformer_m36.py',
+    '../../_base_/datasets/imagenet/poolformer_m_sz224_8xbs128.py',
+    '../../_base_/default_runtime.py',
+]
+
+# model settings
+model = dict(
+    backbone=dict(
+        arch='identityformer_m48', drop_path_rate=0.4),
+)
+
+# data
+data = dict(imgs_per_gpu=128, workers_per_gpu=8)
+
+# additional hooks
+update_interval = 4  # total: 8 x bs128 x 4 accumulates = bs4096
+
+# optimizer
+optimizer = dict(
+    type='AdamW',
+    lr=4e-3,  # lr / bs4096
+    weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999),
+    paramwise_options={
+        '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
+        'norm': dict(weight_decay=0.),
+        'bias': dict(weight_decay=0.),
+        'layer_scale': dict(weight_decay=0.),
+        'res_scale': dict(weight_decay=0.),
+    })
+
+# fp16
+use_fp16 = False
+fp16 = dict(type='mmcv', loss_scale='dynamic')
+optimizer_config = dict(
+    grad_clip=dict(max_norm=5.0), update_interval=update_interval)
+
+# lr scheduler
+lr_config = dict(
+    policy='CosineAnnealing',
+    by_epoch=False, min_lr=1e-6,
+    warmup='linear',
+    warmup_iters=5, warmup_by_epoch=True,
+    warmup_ratio=1e-6,
+)
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=300)
diff --git a/configs/classification/imagenet/metaformer/identityformer_s12_8xb128_ac4_ep300.py b/configs/classification/imagenet/metaformer/identityformer_s12_8xb128_ac4_ep300.py
@@ -0,0 +1,42 @@
+_base_ = [
+    '../../_base_/models/metaformer/identityformer_s12.py',
+    '../../_base_/datasets/imagenet/poolformer_m_sz224_8xbs128.py',
+    '../../_base_/default_runtime.py',
+]
+
+# data
+data = dict(imgs_per_gpu=128, workers_per_gpu=8)
+
+# additional hooks
+update_interval = 4  # total: 8 x bs128 x 4 accumulates = bs4096
+
+# optimizer
+optimizer = dict(
+    type='AdamW',
+    lr=4e-3,  # lr / bs4096
+    weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999),
+    paramwise_options={
+        '(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
+        'norm': dict(weight_decay=0.),
+        'bias': dict(weight_decay=0.),
+        'layer_scale': dict(weight_decay=0.),
+        'res_scale': dict(weight_decay=0.),
+    })
+
+# fp16
+use_fp16 = False
+fp16 = dict(type='mmcv', loss_scale='dynamic')
+optimizer_config = dict(
+    grad_clip=dict(max_norm=5.0), update_interval=update_interval)
+
+# lr scheduler
+lr_config = dict(
+    policy='CosineAnnealing',
+    by_epoch=False, min_lr=1e-6,
+    warmup='linear',
+    warmup_iters=5, warmup_by_epoch=True,
+    warmup_ratio=1e-6,
+)
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=300)