Diff of /processing_pytorch.py [000000] .. [53bd2d]

Switch to unified view

a b/processing_pytorch.py
1
# -*- coding: utf-8 -*-
2
"""
3
Created on Wed Sep  8 10:27:47 2021
4
5
@author: m.beuque
6
"""
7
from matplotlib.image import imread
8
import cv2
9
import tqdm
10
import pandas as pd
11
import os
12
import numpy as np
13
from torch.utils import data
14
import torchvision.transforms as transforms
15
from sklearn.utils import shuffle
16
import torch
17
18
19
def generate_dataset_tissue_type(main_path,path_data,SEED):
20
    data = pd.read_csv(path_data,sep = ',' )
21
22
    X = []
23
    y = []
24
    paths = []
25
    for slide in tqdm.tqdm(os.listdir(os.path.join(main_path, 'Slides'))):
26
        tile_path = os.path.join(main_path, 'Slides',slide,'tiles')
27
        gland = data[(data['labels']=='stroma') & (data['dataset_name']==slide)]['image_name']
28
        tissue = data[(data['labels']=='epithelial tissue') & (data['dataset_name']==slide)]['image_name']
29
        gland = list(gland)
30
        tissue = list(tissue)
31
        for image_path in gland:
32
            if os.path.isfile(os.path.join(tile_path, image_path)):
33
                X.append(imread(os.path.join(tile_path, image_path)))
34
                y.append(0)
35
                paths.append(os.path.join(tile_path, image_path))
36
        for image_path in tissue:
37
            if os.path.isfile(os.path.join(tile_path, image_path)):
38
                X.append(imread(os.path.join(tile_path, image_path)))
39
                y.append(1)
40
                paths.append(os.path.join(tile_path, image_path))
41
42
    X = np.array(X)
43
    for j, elmt in enumerate(X):
44
        if elmt.shape !=(96,96,3):
45
            X[j] = cv2.resize(elmt,(96,96),interpolation = cv2.INTER_CUBIC)
46
    X,y = shuffle(X,y,random_state=SEED)
47
    
48
    return X, y, paths
49
50
def generate_dataset_grade(main_path,path_data): 
51
    #path_data is the path to the csv containing the information of the ing or testing or validation dataset
52
    #main_path contains the folder "Slides" were the H&E tiles where stored
53
    data = pd.read_csv(path_data,sep = ',' )
54
    X = []
55
    y = []
56
    paths = []
57
    
58
    for slide in tqdm.tqdm(os.listdir(os.path.join(main_path, 'Slides'))):
59
        tile_path = os.path.join(main_path, 'Slides',slide,'tiles')
60
        healthy = data[(data['labels']=='non-dysplasia') & (data['dataset_name']==slide)]['image_name'] 
61
        lowgrade = data[(data['labels']=='low grade') & (data['dataset_name']==slide)]['image_name']
62
        highgrade = data[(data['labels']=='high grade') & (data['dataset_name']==slide)]['image_name']
63
        healthy = list(healthy)
64
        lowgrade = list(lowgrade)
65
        highgrade = list(highgrade)
66
        for image_path in healthy:
67
            if os.path.isfile(os.path.join(tile_path, image_path)):
68
                X.append(imread(os.path.join(tile_path, image_path)))
69
                y.append("non-dysplasia")
70
                paths.append(os.path.join(tile_path, image_path))
71
            else:
72
                print("error for non-dysplasia")
73
        for image_path in lowgrade:
74
            if os.path.isfile(os.path.join(tile_path, image_path)):
75
                X.append(imread(os.path.join(tile_path, image_path)))
76
                y.append("low grade")
77
                paths.append(os.path.join(tile_path, image_path))
78
            else :
79
                print("error for low grade")
80
        for image_path in highgrade:
81
            if os.path.isfile(os.path.join(tile_path, image_path)):
82
                X.append(imread(os.path.join(tile_path, image_path)))
83
                y.append("high grade")
84
                paths.append(os.path.join(tile_path, image_path))
85
            else :
86
                print("error for high grade")
87
88
    #rescale the images to the same size
89
    for j, elmt in enumerate(X):
90
        if elmt.shape !=(96,96,3):
91
            X[j] = cv2.resize(elmt,(96,96),interpolation = cv2.INTER_CUBIC)
92
    X = np.array(X)
93
    y = np.array(y)
94
    return X,y,paths
95
96
#regular dataset generation
97
class CancerDataset(data.Dataset):
98
    'Characterizes a dataset for PyTorch'
99
    def __init__(self,X_all, y_all, transform = transforms.Compose([transforms.CenterCrop(64),transforms.ToTensor()])):
100
        'Initialization'
101
        self.labels = y_all
102
        self.list_IDs = X_all
103
        self.transform = transform
104
        self.image_files_list = [str(s) for s in range(len(self.list_IDs))]
105
106
    def __len__(self):
107
        'Denotes the total number of samples'
108
        return len(self.list_IDs)
109
110
    def __getitem__(self, index):
111
        'Generates one sample of data'
112
        # Select sample
113
        X = self.list_IDs[index]
114
        X = self.transform(image=X)
115
        X = X['image']
116
        # Load data and get label
117
        y = self.labels[index]
118
        return X, y
119
    
120
def df_dl_features(X,paths,data_transforms,classifier):
121
    features = {}
122
    for i,temp_X in tqdm.tqdm(enumerate(X)):
123
        tensor_X=data_transforms(image=temp_X)
124
        tensor_X=tensor_X["image"]
125
        tensor_X.unsqueeze_(0)
126
        output=torch.flatten(classifier(tensor_X)).detach().numpy()
127
        features[paths[i]]=output.flatten()
128
    features=pd.DataFrame.from_dict(features)
129
    features=features.T
130
    return features