a b/configs/dpt/dpt.yml
1
Collections:
2
- Name: dpt
3
  Metadata:
4
    Training Data:
5
    - ADE20K
6
  Paper:
7
    URL: https://arxiv.org/abs/2103.13413
8
    Title: Vision Transformer for Dense Prediction
9
  README: configs/dpt/README.md
10
  Code:
11
    URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dpt_head.py#L215
12
    Version: v0.17.0
13
  Converted From:
14
    Code: https://github.com/isl-org/DPT
15
Models:
16
- Name: dpt_vit-b16_512x512_160k_ade20k
17
  In Collection: dpt
18
  Metadata:
19
    backbone: ViT-B
20
    crop size: (512,512)
21
    lr schd: 160000
22
    inference time (ms/im):
23
    - value: 96.06
24
      hardware: V100
25
      backend: PyTorch
26
      batch size: 1
27
      mode: FP32
28
      resolution: (512,512)
29
    Training Memory (GB): 8.09
30
  Results:
31
  - Task: Semantic Segmentation
32
    Dataset: ADE20K
33
    Metrics:
34
      mIoU: 46.97
35
      mIoU(ms+flip): 48.34
36
  Config: configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py
37
  Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-db31cf52.pth