Diff of /utils.py [000000] .. [7b3b0e]

Switch to unified view

a b/utils.py
1
'''
2
Contains the utility functions
3
'''
4
5
import numpy as np
6
import config as config
7
8
import sys
9
sys.path.append('../')
10
11
from sksurv.linear_model import CoxnetSurvivalAnalysis
12
from loader import Dataset
13
14
available_models = ['genomics', 'pyradiomics',
15
                    'densenet', 'intermediate_gp',
16
                    'intermediate_gd', 'late_gp',
17
                    'late_gd']
18
19
def run_coxnet(l1_ratio, n_alphas, x_train, y_train, x_test, y_test):
20
21
    coxnet = CoxnetSurvivalAnalysis(l1_ratio=l1_ratio, n_alphas=n_alphas)
22
    coxnet.fit(x_train, y_train)
23
    outputs = coxnet.predict(x_test)
24
    score = coxnet.score(x_test, y_test)
25
    return outputs, score
26
27
def get_data(split=0, location=config.csv_location, mode='valid'):
28
    '''
29
    use mode = 'test' for testing
30
    '''
31
32
    print('Loading data for mode ' + mode + ' from location ' + location)
33
    X_train, y_train, y_train2 = [], [], []
34
    with open(location + 'train_' + str(split) + '.csv', 'r') as curr_file:
35
        for row in curr_file:
36
            a, b, c = row.split('\t')
37
            X_train.append(a.strip())
38
            y_train.append(int(b.strip()))
39
            y_train2.append(int(c.strip()))
40
41
    X_test, y_test, y_test2 = [], [], []
42
    with open(location + mode + '_' + str(split) + '.csv', 'r') as curr_file:
43
        for row in curr_file:
44
            a, b, c = row.split('\t')
45
            X_test.append(a.strip())
46
            y_test.append(int(b.strip()))
47
            y_test2.append(int(c.strip()))
48
 
49
    return X_train, X_test, y_train, y_test, y_train2, y_test2
50
51
def get_structured_array(data_bool, data_value):
52
    all_bools = data_bool
53
    all_values = data_value
54
#     all_bools = data_bool.cpu().detach().numpy()
55
#     all_values = data_value.cpu().detach().numpy()
56
57
    new_list = []
58
    for idx in range(len(all_bools)):
59
        new_list.append(tuple((all_bools[idx], all_values[idx])))
60
    return np.array(new_list, dtype='bool, i8')
61
62
63
class DataLoader(object):
64
65
    def __init__(self, fold=0, num_genes=500, mode='cpu'):
66
        self.fold = fold
67
        self.num_genes = num_genes
68
        self.load_data(mode)
69
70
    def load_data(self, mode):
71
        X_train_list, X_valid_list, y_value_train, y_value_valid, \
72
            y_train, y_valid = get_data(self.fold, config.csv_location, 'valid')
73
        _, X_test_list, _, y_value_test, _, y_test = \
74
            get_data(self.fold, config.csv_location, 'test')
75
76
        self.train_num = len(X_train_list)
77
        self.valid_num = len(X_valid_list)
78
        self.test_num = len(X_test_list)
79
80
        # labels
81
82
        if mode == 'cpu':
83
            self.y_train_bool = np.array(y_train)
84
            self.y_valid_bool = np.array(y_valid)
85
            self.y_test_bool = np.array(y_test)
86
            self.y_train_value = np.array(y_value_train)
87
            self.y_valid_value = np.array(y_value_valid)
88
            self.y_test_value = np.array(y_value_test)
89
        elif mode == 'gpu':
90
            from torch.autograd import Variable
91
            self.y_train_bool = Variable(torch.from_numpy(
92
                np.array(y_train))).float()
93
            self.y_valid_bool = Variable(torch.from_numpy(
94
                np.array(y_valid))).float()
95
            self.y_test_bool = Variable(torch.from_numpy(
96
                np.array(y_test))).float()
97
            self.y_train_value = Variable(torch.from_numpy(
98
                np.array(y_value_train))).float()
99
            self.y_valid_value = Variable(torch.from_numpy(
100
                np.array(y_value_valid))).float()
101
            self.y_test_value = Variable(torch.from_numpy(
102
                np.array(y_value_test))).float()
103
        else:
104
            raise(NotImplementedError)
105
106
        NRG = Dataset(config)
107
108
        # genomics
109
        X_gen_train, gen_list = NRG.get_genomics(X_train_list)
110
        X_gen_valid, gen_list = NRG.get_genomics(X_valid_list)
111
        X_gen_test, gen_list = NRG.get_genomics(X_test_list)
112
113
        all_std = np.std(np.array(X_gen_train), axis=0)
114
        all_sorted = np.argsort(all_std)
115
        X_gen_train = np.array(X_gen_train)[:, all_sorted[-self.num_genes:]]
116
        X_gen_valid = np.array(X_gen_valid)[:, all_sorted[-self.num_genes:]]
117
        X_gen_test = np.array(X_gen_test)[:, all_sorted[-self.num_genes:]]
118
119
        max_gen = np.max(np.concatenate(
120
             (X_gen_train, X_gen_valid, X_gen_train), axis=0))
121
        X_gen_train = (X_gen_train) / max_gen
122
        X_gen_valid = (X_gen_valid) / max_gen
123
        X_gen_test = (X_gen_test) / max_gen
124
125
        if mode == 'gpu':
126
            self.gen_train = Variable(torch.from_numpy(X_gen_train)).float()
127
            self.gen_valid = Variable(torch.from_numpy(X_gen_valid)).float()
128
            self.gen_test = Variable(torch.from_numpy(X_gen_test)).float()
129
        elif mode == 'cpu':
130
            self.gen_train = X_gen_train
131
            self.gen_valid = X_gen_valid
132
            self.gen_test = X_gen_test
133
134
        # pyradiomics
135
        X_pyrad_train = NRG.get_pyradiomics(X_train_list)
136
        X_pyrad_valid = NRG.get_pyradiomics(X_valid_list)
137
        X_pyrad_test = NRG.get_pyradiomics(X_test_list)
138
139
        max_pyrad = np.max(np.concatenate(
140
            (X_pyrad_train, X_pyrad_valid, X_pyrad_train), axis=0))
141
        X_pyrad_train = (X_pyrad_train) / max_pyrad
142
        X_pyrad_valid = (X_pyrad_valid) / max_pyrad
143
        X_pyrad_test = (X_pyrad_test) / max_pyrad
144
145
        if mode == 'gpu':
146
            self.pyrad_train = Variable(torch.from_numpy(X_pyrad_train)).float()
147
            self.pyrad_valid = Variable(torch.from_numpy(X_pyrad_valid)).float()
148
            self.pyrad_test = Variable(torch.from_numpy(X_pyrad_test)).float()
149
        elif mode == 'cpu':
150
            self.pyrad_train = X_pyrad_train
151
            self.pyrad_valid = X_pyrad_valid
152
            self.pyrad_test = X_pyrad_test
153
154
            
155
        # densenet
156
        X_dense_train = NRG.get_densenet_features(X_train_list)
157
        X_dense_valid = NRG.get_densenet_features(X_valid_list)
158
        X_dense_test = NRG.get_densenet_features(X_test_list)
159
160
        max_dense = np.max(np.concatenate(
161
            (X_dense_train, X_dense_valid, X_dense_train), axis=0))
162
        X_dense_train = (X_dense_train) / max_dense
163
        X_dense_valid = (X_dense_valid) / max_dense
164
        X_dense_test = (X_dense_test) / max_dense
165
166
        if mode == 'gpu':
167
            self.dense_train = Variable(torch.from_numpy(X_dense_train)).float()
168
            self.dense_valid = Variable(torch.from_numpy(X_dense_valid)).float()
169
            self.dense_test = Variable(torch.from_numpy(X_dense_test)).float()
170
        elif mode == 'cpu':
171
            self.dense_train = X_dense_train
172
            self.dense_valid = X_dense_valid
173
            self.dense_test = X_dense_test
174
175
176