a b/openomics/imageomics.py
1
import os
2
3
import h5py
4
# import large_image
5
import numpy as np
6
from dask import delayed
7
8
9
# import histomicstk as htk
10
# import histomicstk.segmentation.positive_pixel_count as ppc
11
12
13
class WholeSlideImage:
14
    def __init__(self, cohort_name, folder_path, force_preprocess=False):
15
        """
16
        Args:
17
            cohort_name:
18
            folder_path:
19
            force_preprocess:
20
        """
21
        self.cancer_type = cohort_name
22
        if not os.path.isdir(folder_path) or not os.path.exists(folder_path):
23
            raise NotADirectoryError(folder_path)
24
25
        fname = os.path.join(folder_path, "models", "wsi_preprocessed.hdf5")
26
27
        with h5py.File(fname, "w") as f:
28
            if (not "wsi_preprocessed" in f) or force_preprocess:
29
                print("Preprocessing new WSI's")
30
                self.run_preprocess(f, folder_path)
31
32
            else:
33
                print("Already has wsi_preprocessed. Loading data from hdf5 file")
34
35
    @classmethod
36
    def name(cls):
37
        return cls.__name__
38
39
    def run_preprocess(self, f, folder_path):
40
        """
41
        Args:
42
            f:
43
            folder_path:
44
        """
45
        wsi_preprocessed = f.create_dataset("wsi_preprocessed", (100,), dtype='i')
46
        wsi_file = self.wsi_file_iterator(folder_path)
47
48
        i = 2
49
        while True and i > 0:
50
            imagePath = os.path.join(folder_path, wsi_file.__next__())
51
            i = i - 1
52
            self.preprocess_wsi(f, imagePath)
53
54
    def preprocess_wsi(self, f, imagePath):
55
        """
56
        Args:
57
            f:
58
            imagePath:
59
        """
60
        print(imagePath)
61
        print(slide_to_tile(imagePath))
62
        pass
63
64
    def wsi_file_iterator(self, folder_path):
65
        """
66
        Args:
67
            folder_path:
68
        """
69
        has_any_wsi = False
70
        for file in os.listdir(folder_path):
71
            if file.endswith(".svs"):
72
                has_any_wsi = True
73
                yield file
74
75
        if not has_any_wsi:
76
            raise Exception("Folder " + folder_path + " doesn't contain any WSI .svs files")
77
78
79
def slide_to_tile(slide_path, params=None, region=None,
80
                  tile_grouping=256):
81
    """Function to parallelize any function by tiling the slide. This routine
82
    can also create a label image.
83
84
    Args:
85
        slide_path (string (path)): Path to the slide to analyze.
86
        params (Parameters): An instance of Parameters, which see for further
87
            documentation
88
        region (dict, optional): A valid region dict (per a large_image
89
            TileSource.tileIterator's region argument)
90
        tile_grouping (int): The number of tiles to process as part of a single
91
            task
92
93
    Returns:
94
        * **stats** (*Output*) -- Various statistics on the input image. See
95
          Output.
96
        * **label_image** (*array-like, only if make_label_image is set*)
97
98
    Notes:
99
        The return value is either a single or a pair -- it is in either case a
100
        tuple. Dask is used as configured to compute the statistics, but only if
101
        make_label_image is reset. If make_label_image is set, everything is
102
        computed in a single-threaded manner.
103
    """
104
    ts = large_image.getTileSource(slide_path)
105
    print(ts.getMetadata())
106
    kwargs = dict(format=large_image.tilesource.TILE_FORMAT_NUMPY)
107
    if region is not None:
108
        kwargs['region'] = region
109
    else:
110
        results = []
111
        total_tiles = ts.getSingleTile(**kwargs)['iterator_range']['position']
112
        for position in range(0, total_tiles, tile_grouping):
113
            results.append(delayed(_count_tiles)(
114
                slide_path, params, kwargs, position,
115
                min(tile_grouping, total_tiles - position)))
116
        results = delayed(_combine)(results).compute()
117
    return results
118
119
120
def _count_tiles(slide_path, params, kwargs, position, count):
121
    """
122
    Args:
123
        slide_path:
124
        params:
125
        kwargs:
126
        position:
127
        count:
128
    """
129
    ts = large_image.getTileSource(slide_path)
130
131
    subtotal = np.array((0, 0))
132
    for pos in range(position, position + count):
133
        tile = ts.getSingleTile(tile_position=pos, **kwargs)['tile']
134
        subtotal = subtotal + np.array(tile.shape[0:2])
135
136
    return subtotal
137
138
139
def _combine(results):
140
    """
141
    Args:
142
        results:
143
    """
144
    total = np.sum(results, axis=0)
145
    return total
146
147
148
149
if __name__ == '__main__':
150
    wsi = WholeSlideImage("LUAD", "/media/jonny_admin/540GB/Research/TCGA_LUAD-WSI/", force_preprocess=True)