a b/mediaug/download.py
1
import os
2
from os.path import join
3
from pathlib import Path
4
import urllib.request
5
import zipfile
6
7
8
def download_sipakmed(data_dest=None):
9
    """ Download the SIPaKMeD dataset, http://cs.uoi.gr/~marina
10
    Args:
11
      data_dest (str): The path of dir to store data. If noe will default to package cache
12
    Returns:
13
      None
14
    """
15
    if data_dest is None:
16
        data_dest = join(get_data_cache(), 'sipakmed_raw')
17
    if os.path.exists(data_dest):
18
        raise ValueError('Data already downloaded.')
19
    os.mkdir(data_dest)
20
21
    file_urls = [
22
        'http://cs.uoi.gr/~marina/SIPAKMED/im_Metaplastic.7z',
23
        'http://cs.uoi.gr/~marina/SIPAKMED/im_Dyskeratotic.7z'
24
        'http://cs.uoi.gr/~marina/SIPAKMED/im_Koilocytotic.7z',
25
        'http://cs.uoi.gr/~marina/SIPAKMED/im_Parabasal.7z',
26
        'http://cs.uoi.gr/~marina/SIPAKMED/im_Superficial-Intermediate.7z',
27
    ]
28
    cell_types = [
29
        'metaplastic',
30
        'dyskeratotic',
31
        'koilocytotic'
32
        'parabasal',
33
        'superficial-Intermediate',
34
    ]
35
36
    print(f'Downloading SIPaKMed to: {data_dest}')
37
    for url, cell_type in zip(file_urls, cell_types):
38
        print(f'Downloading {cell_type}...')
39
        file_name = f'{cell_type}.zip'
40
        urllib.request.urlretrieve(url, join(data_dest, file_name))
41
42
    for file_name in data_dest:
43
        print(f'Extracting {file_name}...')
44
        with zipfile.ZipFile(file_urls) as f:
45
            f.extract(file_name, dir)
46
        os.remove(file_name)
47
48
    print('Finished downloading.')
49
    
50
51
def download_smear(data_dest=None):
52
    """ Download the Hervel smear dataset
53
    Args:
54
      data_dest (str): The path of dir to store data. If noe will default to package cache
55
    Returns:
56
      None
57
    """
58
    raise NotImplementedError('Get it yourself, not implemented.')
59
60
61
def get_data_cache():
62
    """ Get the cache where datasets are stored. Defaults to ~/.mediaug
63
    Args:
64
      None
65
    Returns:
66
      path (str): The path to the data cache on the system
67
    """
68
    p = Path(join(Path.home(), '.mediaug'))
69
    p.mkdir(exist_ok=True)
70
    return str(p)