Skip to content

Commit

Permalink
update and release cifar100 mixup
Browse files Browse the repository at this point in the history
  • Loading branch information
Lupin1998 committed Jul 18, 2023
1 parent d66ed40 commit 9d4a81d
Show file tree
Hide file tree
Showing 28 changed files with 885 additions and 66 deletions.
4 changes: 2 additions & 2 deletions configs/classification/cifar100/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ We summarize mixup benchmarks in [Model Zoo](https://github.com/Westlake-AI/open
**Setup**

* Since the original resolutions of CIFAR-100 are too small for ViTs, we resize the input images to $224\times 224$ (training and testing) while not modifying the ViT architectures. This benchmark uses DeiT setup and trains the model for 200 epochs with a batch size of 100 on CIFAR-100. The basic learning rate of DeiT and Swin are $1e-3$ and $5e-4$, which is the optimal setup in our experiments. We search and report $\alpha$ in $Beta(\alpha, \alpha)$ for all compared methods. View config files in [mixups/vits](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/cifar100/mixups/vits/).
* The **best** of top-1 accuracy in the last 10 training epochs is reported for ViT architectures. Notice that 📖 denotes original results reproduced by official implementations.
* The **best** of top-1 accuracy in the last 10 training epochs is reported for ViT architectures. Notice that 📖 denotes original results reproduced by official implementations. We released the trained models and logs in [vits-mix-cifar100-weights](https://github.com/Westlake-AI/openmixup/releases/tag/vits-mix-cifar100-weights).

| Backbones | $Beta$ | DEiT-S(/16) | Swin-T |
|---------------|:--------:|:-----------:|:----------:|
Expand All @@ -98,7 +98,7 @@ We summarize mixup benchmarks in [Model Zoo](https://github.com/Westlake-AI/open
| ResizeMix* | 1 | 68.45 | 80.16 |
| TransMix | 0.8,1 | 76.17 | - |
| AutoMix | 2 | 76.24 | 82.67 |
| SAMix* | 2 | 77.94 | |
| SAMix* | 2 | 77.94 | 82.62 |

## Citation

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@
head_one=dict(
type='VisionTransformerClsHead', # mixup CE + label smooth
loss=dict(type='LabelSmoothLoss',
label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0),
in_channels=384, num_classes=1000),
label_smooth_val=0.1, num_classes=100, mode='original', loss_weight=1.0),
in_channels=384, num_classes=100),
head_mix=dict(
type='VisionTransformerClsHead', # mixup CE + label smooth
loss=dict(type='LabelSmoothLoss',
label_smooth_val=0.1, num_classes=1000, mode='original', loss_weight=1.0),
in_channels=384, num_classes=1000),
label_smooth_val=0.1, num_classes=100, mode='original', loss_weight=1.0),
in_channels=384, num_classes=100),
head_weights=dict(
decent_weight=[], accent_weight=[],
head_mix_q=1, head_one_q=1, head_mix_k=1, head_one_k=1),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
_base_ = [
'../../../_base_/datasets/cifar100/sz224_swin_bs100.py',
'../../../_base_/datasets/cifar100/sz224_randaug_bs100.py',
'../../../_base_/default_runtime.py',
]

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
_base_ = [
'../../../_base_/datasets/cifar100/sz32_randaug_bs100.py',
'../../../_base_/default_runtime.py',
]

# model settings
model = dict(
type='MixUpClassification',
pretrained=None,
alpha=[1, 0.8],
mix_mode=['cutmix', 'mixup'],
mix_args=dict(
alignmix=dict(eps=0.1, max_iter=100),
attentivemix=dict(grid_size=32, top_k=None, beta=8), # AttentiveMix+ in this repo (use pre-trained)
automix=dict(mask_adjust=0, lam_margin=0), # require pre-trained mixblock
fmix=dict(decay_power=3, size=(32,32), max_soft=0., reformulate=False),
gridmix=dict(n_holes=(2, 6), hole_aspect_ratio=1.,
cut_area_ratio=(0.5, 1), cut_aspect_ratio=(0.5, 2)),
manifoldmix=dict(layer=(0, 3)),
puzzlemix=dict(transport=True, t_batch_size=None, t_size=4, # t_size for small-scale datasets
block_num=5, beta=1.2, gamma=0.5, eta=0.2, neigh_size=4, n_labels=3, t_eps=0.8),
resizemix=dict(scope=(0.1, 0.8), use_alpha=True),
samix=dict(mask_adjust=0, lam_margin=0.08), # require pre-trained mixblock
transmix=dict(mix_mode="cutmix"),
),
backbone=dict(
type='ConvNeXt_CIFAR',
arch='tiny',
out_indices=(3,), # x-1: stage-x
act_cfg=dict(type='GELU'),
drop_path_rate=0.3,
gap_before_final_norm=True,
),
head=dict(
type='ClsMixupHead', # mixup CE + label smooth
loss=dict(type='LabelSmoothLoss',
label_smooth_val=0.1, num_classes=100, mode='original', loss_weight=1.0),
with_avg_pool=False,
in_channels=768, num_classes=100),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer=['LayerNorm', 'BatchNorm'], val=1., bias=0.)
],
)

# optimizer
optimizer = dict(
type='AdamW',
lr=1e-3,
weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999),
paramwise_options={
'(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
'norm': dict(weight_decay=0.),
'bias': dict(weight_decay=0.),
'gamma': dict(weight_decay=0.),
})

# interval for accumulate gradient
update_interval = 1 # total: 1 x bs100 x 1 accumulates = bs100

# fp16
use_fp16 = True
fp16 = dict(type='mmcv', loss_scale='dynamic')
optimizer_config = dict(grad_clip=None, update_interval=update_interval)

# learning policy
lr_config = dict(
policy='CosineAnnealing',
by_epoch=False, min_lr=1e-6,
warmup='linear',
warmup_iters=20, warmup_by_epoch=True,
warmup_ratio=1e-5,
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
_base_ = "../convnext_t_mixups_bs100.py"

# model settings
model = dict(
pretrained=None,
pretrained_k="torchvision://resnet50",
alpha=2, # float or list
mix_mode="attentivemix",
backbone_k=dict( # PyTorch pre-trained R-18 is required for attentivemix+
type='ResNet',
depth=50,
num_stages=4,
out_indices=(3,),
style='pytorch'),
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
_base_ = [
'../../../../_base_/datasets/cifar100/sz32_randaug_bs100.py',
'../../../../_base_/default_runtime.py',
]

# model settings
model = dict(
type='AutoMixup',
pretrained=None,
alpha=2.0,
momentum=0.999,
mask_layer=2, # dowmsampling to 1/16
mask_loss=0.1, # using loss
mask_adjust=0, # none for large datasets
lam_margin=0.08,
switch_off=1.0, # switch off mixblock (fixed)
mask_up_override=None,
debug=True,
backbone=dict(
type='ConvNeXt_CIFAR',
arch='tiny',
out_indices=(2, 3), # x-1: stage-x
act_cfg=dict(type='GELU'),
drop_path_rate=0.3,
gap_before_final_norm=True,
),
mix_block = dict( # AutoMix
type='PixelMixBlock',
in_channels=384, reduction=2, use_scale=True,
unsampling_mode=['nearest',], # str or list, train & test MixBlock, 'nearest' for AutoMix
lam_concat=True, lam_concat_v=False, # AutoMix.V1: lam cat q,k,v
lam_mul=False, lam_residual=False, lam_mul_k=-1, # SAMix lam: none
x_qk_concat=False, x_v_concat=False, # SAMix x concat: none
att_norm_cfg=None, # AutoMix: attention norm for fp16
mask_loss_mode="L1", mask_loss_margin=0.1, # L1 loss, 0.1
frozen=False),
head_one=dict(
type='VisionTransformerClsHead', # mixup CE + label smooth
loss=dict(type='LabelSmoothLoss',
label_smooth_val=0.1, num_classes=100, mode='original', loss_weight=1.0),
with_avg_pool=False,
in_channels=768, num_classes=100),
head_mix=dict(
type='VisionTransformerClsHead', # mixup CE + label smooth
loss=dict(type='LabelSmoothLoss',
label_smooth_val=0.1, num_classes=100, mode='original', loss_weight=1.0),
with_avg_pool=False,
in_channels=768, num_classes=100),
head_weights=dict(
decent_weight=[], accent_weight=[],
head_mix_q=1, head_one_q=1, head_mix_k=1, head_one_k=1),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
)

# interval for accumulate gradient
update_interval = 1 # total: 8 x bs128 x 1 accumulates = bs1024

custom_hooks = [
dict(type='SAVEHook',
save_interval=500 * 20, # 20 ep
iter_per_epoch=500,
),
dict(type='CustomCosineAnnealingHook', # 0.1 to 0
attr_name="mask_loss", attr_base=0.1, min_attr=0., by_epoch=False, # by iter
update_interval=update_interval,
),
dict(type='CosineScheduleHook',
end_momentum=0.99996, # 0.999 to 0.99996
adjust_scope=[0.25, 1.0],
warming_up="constant",
update_interval=update_interval,
interval=1)
]

# optimizer
optimizer = dict(
type='AdamW',
lr=1e-3,
weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999),
paramwise_options={
'(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
'norm': dict(weight_decay=0.),
'bias': dict(weight_decay=0.),
'gamma': dict(weight_decay=0.),
'mix_block': dict(lr=1e-3),
})
# # Sets `find_unused_parameters`: randomly switch off mixblock
# find_unused_parameters = True

# fp16
use_fp16 = False
fp16 = dict(type='mmcv', loss_scale='dynamic')
optimizer_config = dict(grad_clip=None, update_interval=update_interval)

# lr scheduler: Swim for DeiT
lr_config = dict(
policy='CosineAnnealing',
by_epoch=False, min_lr=1e-4,
warmup='linear',
warmup_iters=20, warmup_by_epoch=True,
warmup_ratio=1e-5,
)

# additional scheduler
addtional_scheduler = dict(
policy='CosineAnnealing',
by_epoch=False, min_lr=1e-4, # 0.1 x lr
paramwise_options=['mix_block'],
warmup_iters=20, warmup_by_epoch=True,
warmup_ratio=1e-5,
)

# validation hook
evaluation = dict(initial=False, save_best=None)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
_base_ = "../convnext_t_mixups_bs100.py"

# model settings
model = dict(
alpha=2.0,
mix_mode="cutmix",
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
_base_ = "../convnext_t_mixups_bs100.py"

# model settings
model = dict(
alpha=[1, 0.8],
mix_mode=['cutmix', 'mixup'],
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
_base_ = "../convnext_t_mixups_bs100.py"

# model settings
model = dict(
alpha=1.0,
mix_mode="fmix",
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
_base_ = "../convnext_t_mixups_bs100.py"

# model settings
model = dict(
alpha=1.0,
mix_mode="gridmix",
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
_base_ = "../convnext_t_mixups_bs100.py"

# model settings
model = dict(
alpha=0.8,
mix_mode="mixup",
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
_base_ = "../convnext_t_mixups_bs100.py"

# model settings
model = dict(
alpha=2.0,
mix_mode="puzzlemix",
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_base_ = "../convnext_t_mixups_bs100.py"

# model settings
model = dict(
alpha=1.0,
mix_mode="resizemix",
mix_args=dict(
resizemix=dict(scope=(0.1, 0.8), use_alpha=True),
),
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
_base_ = "../convnext_t_mixups_bs100.py"

# model settings
model = dict(
alpha=0.2,
mix_mode="saliencymix",
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Loading

0 comments on commit 9d4a81d

Please sign in to comment.