-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
cn-clip_vit-base-p16_zeroshot-cls_cifar100.py
76 lines (71 loc) · 1.98 KB
/
cn-clip_vit-base-p16_zeroshot-cls_cifar100.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
_base_ = '../_base_/default_runtime.py'
# data settings
data_preprocessor = dict(
type='MultiModalDataPreprocessor',
mean=[0.48145466 * 255, 0.4578275 * 255, 0.40821073 * 255],
std=[0.26862954 * 255, 0.26130258 * 255, 0.27577711 * 255],
to_rgb=False,
)
test_pipeline = [
dict(type='Resize', scale=(224, 224), interpolation='bicubic'),
dict(
type='PackInputs',
algorithm_keys=['text'],
meta_keys=['image_id', 'scale_factor'],
),
]
train_dataloader = None
test_dataloader = dict(
batch_size=32,
num_workers=8,
dataset=dict(
type='CIFAR100',
data_root='data/cifar100',
split='test',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
test_evaluator = dict(type='Accuracy', topk=(1, ))
# schedule settings
train_cfg = None
val_cfg = None
test_cfg = dict()
# model settings
model = dict(
type='ChineseCLIP',
vision_backbone=dict(
type='VisionTransformer',
arch='base',
img_size=224,
patch_size=16,
norm_cfg=dict(type='LN', eps=1e-5),
final_norm=True,
layer_cfgs=dict(act_cfg=dict(type='QuickGELU')),
pre_norm=True,
out_type='cls_token',
),
text_backbone=dict(
type='BertModelCN',
config=dict(
vocab_size=21128,
pad_token_id=0,
add_type_embeddings=True,
attention_probs_dropout_prob=0.1,
hidden_act='gelu',
hidden_dropout_prob=0.1,
hidden_size=768,
initializer_range=0.02,
intermediate_size=3072,
max_position_embeddings=512,
num_attention_heads=12,
num_hidden_layers=12,
type_vocab_size=2,
layer_norm_eps=1e-12)),
tokenizer=dict(
type='FullTokenizer',
vocab_file= # noqa
'https://download.openmmlab.com/mmpretrain/v1.0/chinese_clip/vocab.txt'
),
proj_dim=512,
text_prototype='cifar100',
)