a b/tools/convert_datasets/hrf.py
1
# Copyright (c) OpenMMLab. All rights reserved.
2
import argparse
3
import os
4
import os.path as osp
5
import tempfile
6
import zipfile
7
8
import mmcv
9
10
HRF_LEN = 15
11
TRAINING_LEN = 5
12
13
14
def parse_args():
15
    parser = argparse.ArgumentParser(
16
        description='Convert HRF dataset to mmsegmentation format')
17
    parser.add_argument('healthy_path', help='the path of healthy.zip')
18
    parser.add_argument(
19
        'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip')
20
    parser.add_argument('glaucoma_path', help='the path of glaucoma.zip')
21
    parser.add_argument(
22
        'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip')
23
    parser.add_argument(
24
        'diabetic_retinopathy_path',
25
        help='the path of diabetic_retinopathy.zip')
26
    parser.add_argument(
27
        'diabetic_retinopathy_manualsegm_path',
28
        help='the path of diabetic_retinopathy_manualsegm.zip')
29
    parser.add_argument('--tmp_dir', help='path of the temporary directory')
30
    parser.add_argument('-o', '--out_dir', help='output path')
31
    args = parser.parse_args()
32
    return args
33
34
35
def main():
36
    args = parse_args()
37
    images_path = [
38
        args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path
39
    ]
40
    annotations_path = [
41
        args.healthy_manualsegm_path, args.glaucoma_manualsegm_path,
42
        args.diabetic_retinopathy_manualsegm_path
43
    ]
44
    if args.out_dir is None:
45
        out_dir = osp.join('data', 'HRF')
46
    else:
47
        out_dir = args.out_dir
48
49
    print('Making directories...')
50
    mmcv.mkdir_or_exist(out_dir)
51
    mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
52
    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
53
    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
54
    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
55
    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
56
    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
57
58
    print('Generating images...')
59
    for now_path in images_path:
60
        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
61
            zip_file = zipfile.ZipFile(now_path)
62
            zip_file.extractall(tmp_dir)
63
64
            assert len(os.listdir(tmp_dir)) == HRF_LEN, \
65
                'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN)
66
67
            for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
68
                img = mmcv.imread(osp.join(tmp_dir, filename))
69
                mmcv.imwrite(
70
                    img,
71
                    osp.join(out_dir, 'images', 'training',
72
                             osp.splitext(filename)[0] + '.png'))
73
            for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
74
                img = mmcv.imread(osp.join(tmp_dir, filename))
75
                mmcv.imwrite(
76
                    img,
77
                    osp.join(out_dir, 'images', 'validation',
78
                             osp.splitext(filename)[0] + '.png'))
79
80
    print('Generating annotations...')
81
    for now_path in annotations_path:
82
        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
83
            zip_file = zipfile.ZipFile(now_path)
84
            zip_file.extractall(tmp_dir)
85
86
            assert len(os.listdir(tmp_dir)) == HRF_LEN, \
87
                'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN)
88
89
            for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
90
                img = mmcv.imread(osp.join(tmp_dir, filename))
91
                # The annotation img should be divided by 128, because some of
92
                # the annotation imgs are not standard. We should set a
93
                # threshold to convert the nonstandard annotation imgs. The
94
                # value divided by 128 is equivalent to '1 if value >= 128
95
                # else 0'
96
                mmcv.imwrite(
97
                    img[:, :, 0] // 128,
98
                    osp.join(out_dir, 'annotations', 'training',
99
                             osp.splitext(filename)[0] + '.png'))
100
            for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
101
                img = mmcv.imread(osp.join(tmp_dir, filename))
102
                mmcv.imwrite(
103
                    img[:, :, 0] // 128,
104
                    osp.join(out_dir, 'annotations', 'validation',
105
                             osp.splitext(filename)[0] + '.png'))
106
107
    print('Done!')
108
109
110
if __name__ == '__main__':
111
    main()