|
a |
|
b/configs/cswin/upernet_cswin_base.py |
|
|
1 |
_base_ = [ |
|
|
2 |
'../_base_/models/upernet_cswin.py', '../_base_/datasets/ade20k.py', |
|
|
3 |
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' |
|
|
4 |
] |
|
|
5 |
model = dict( |
|
|
6 |
backbone=dict( |
|
|
7 |
type='CSWin', |
|
|
8 |
embed_dim=96, |
|
|
9 |
depth=[2,4,32,2], |
|
|
10 |
num_heads=[4,8,16,32], |
|
|
11 |
split_size=[1,2,7,7], |
|
|
12 |
drop_path_rate=0.6, |
|
|
13 |
use_chk=False, |
|
|
14 |
), |
|
|
15 |
decode_head=dict( |
|
|
16 |
in_channels=[96,192,384,768], |
|
|
17 |
num_classes=150 |
|
|
18 |
), |
|
|
19 |
auxiliary_head=dict( |
|
|
20 |
in_channels=384, |
|
|
21 |
num_classes=150 |
|
|
22 |
)) |
|
|
23 |
|
|
|
24 |
# AdamW optimizer, no weight decay for position embedding & layer norm in backbone |
|
|
25 |
optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, |
|
|
26 |
paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), |
|
|
27 |
'relative_position_bias_table': dict(decay_mult=0.), |
|
|
28 |
'norm': dict(decay_mult=0.)})) |
|
|
29 |
|
|
|
30 |
lr_config = dict(_delete_=True, policy='poly', |
|
|
31 |
warmup='linear', |
|
|
32 |
warmup_iters=1500, |
|
|
33 |
warmup_ratio=1e-6, |
|
|
34 |
power=1.0, min_lr=0.0, by_epoch=False) |
|
|
35 |
|
|
|
36 |
data=dict(samples_per_gpu=2) |