|
a |
|
b/params/basic_params.py |
|
|
1 |
import time |
|
|
2 |
import argparse |
|
|
3 |
import torch |
|
|
4 |
import os |
|
|
5 |
import models |
|
|
6 |
from util import util |
|
|
7 |
|
|
|
8 |
|
|
|
9 |
class BasicParams: |
|
|
10 |
""" |
|
|
11 |
This class define the console parameters |
|
|
12 |
""" |
|
|
13 |
|
|
|
14 |
def __init__(self): |
|
|
15 |
""" |
|
|
16 |
Reset the class. Indicates the class hasn't been initialized |
|
|
17 |
""" |
|
|
18 |
self.initialized = False |
|
|
19 |
self.isTrain = True |
|
|
20 |
self.isTest = True |
|
|
21 |
|
|
|
22 |
def initialize(self, parser): |
|
|
23 |
""" |
|
|
24 |
Define the common console parameters |
|
|
25 |
""" |
|
|
26 |
parser.add_argument('--gpu_ids', type=str, default='0', |
|
|
27 |
help='which GPU would like to use: e.g. 0 or 0,1, -1 for CPU') |
|
|
28 |
parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', |
|
|
29 |
help='models, settings and intermediate results are saved in folder in this directory') |
|
|
30 |
parser.add_argument('--experiment_name', type=str, default='test', |
|
|
31 |
help='name of the folder in the checkpoint directory') |
|
|
32 |
|
|
|
33 |
# Dataset parameters |
|
|
34 |
parser.add_argument('--omics_mode', type=str, default='a', |
|
|
35 |
help='omics types would like to use in the model, options: [abc | ab | a | b | c]') |
|
|
36 |
parser.add_argument('--data_root', type=str, default='./data', |
|
|
37 |
help='path to input data') |
|
|
38 |
parser.add_argument('--batch_size', type=int, default=32, |
|
|
39 |
help='input data batch size') |
|
|
40 |
parser.add_argument('--num_threads', default=0, type=int, |
|
|
41 |
help='number of threads for loading data') |
|
|
42 |
parser.add_argument('--set_pin_memory', action='store_true', |
|
|
43 |
help='set pin_memory in the dataloader to increase data loading performance') |
|
|
44 |
parser.add_argument('--not_stratified', action='store_true', |
|
|
45 |
help='do not apply the stratified mode in train/test split if set true') |
|
|
46 |
parser.add_argument('--use_sample_list', action='store_true', |
|
|
47 |
help='provide a subset sample list of the dataset, store in the path data_root/sample_list.tsv, if False use all the samples') |
|
|
48 |
parser.add_argument('--use_feature_lists', action='store_true', |
|
|
49 |
help='provide feature lists of the input omics data, e.g. data_root/feature_list_A.tsv, if False use all the features') |
|
|
50 |
parser.add_argument('--detect_na', action='store_true', |
|
|
51 |
help='detect missing value markers during data loading, stay False can improve the loading performance') |
|
|
52 |
parser.add_argument('--file_format', type=str, default='tsv', |
|
|
53 |
help='file format of the omics data, options: [tsv | csv | hdf]') |
|
|
54 |
|
|
|
55 |
# Model parameters |
|
|
56 |
parser.add_argument('--model', type=str, default='vae_classifier', |
|
|
57 |
help='chooses which model want to use, options: [vae_classifier | vae_regression | vae_survival | vae_multitask]') |
|
|
58 |
parser.add_argument('--net_VAE', type=str, default='fc_sep', |
|
|
59 |
help='specify the backbone of the VAE, default is the one dimensional CNN, options: [conv_1d | fc_sep | fc]') |
|
|
60 |
parser.add_argument('--net_down', type=str, default='multi_FC_classifier', |
|
|
61 |
help='specify the backbone of the downstream task network, default is the multi-layer FC classifier, options: [multi_FC_classifier | multi_FC_regression | multi_FC_survival | multi_FC_multitask]') |
|
|
62 |
parser.add_argument('--norm_type', type=str, default='batch', |
|
|
63 |
help='the type of normalization applied to the model, default to use batch normalization, options: [batch | instance | none ]') |
|
|
64 |
parser.add_argument('--filter_num', type=int, default=8, |
|
|
65 |
help='number of filters in the last convolution layer in the generator') |
|
|
66 |
parser.add_argument('--conv_k_size', type=int, default=9, |
|
|
67 |
help='the kernel size of convolution layer, default kernel size is 9, the kernel is one dimensional.') |
|
|
68 |
parser.add_argument('--dropout_p', type=float, default=0.2, |
|
|
69 |
help='probability of an element to be zeroed in a dropout layer, default is 0 which means no dropout.') |
|
|
70 |
parser.add_argument('--leaky_slope', type=float, default=0.2, |
|
|
71 |
help='the negative slope of the Leaky ReLU activation function') |
|
|
72 |
parser.add_argument('--latent_space_dim', type=int, default=128, |
|
|
73 |
help='the dimensionality of the latent space') |
|
|
74 |
parser.add_argument('--seed', type=int, default=42, |
|
|
75 |
help='random seed') |
|
|
76 |
parser.add_argument('--init_type', type=str, default='normal', |
|
|
77 |
help='choose the method of network initialization, options: [normal | xavier_normal | xavier_uniform | kaiming_normal | kaiming_uniform | orthogonal]') |
|
|
78 |
parser.add_argument('--init_gain', type=float, default=0.02, |
|
|
79 |
help='scaling factor for normal, xavier and orthogonal initialization methods') |
|
|
80 |
|
|
|
81 |
# Loss parameters |
|
|
82 |
parser.add_argument('--recon_loss', type=str, default='BCE', |
|
|
83 |
help='chooses the reconstruction loss function, options: [BCE | MSE | L1]') |
|
|
84 |
parser.add_argument('--reduction', type=str, default='mean', |
|
|
85 |
help='chooses the reduction to apply to the loss function, options: [sum | mean]') |
|
|
86 |
parser.add_argument('--k_kl', type=float, default=0.01, |
|
|
87 |
help='weight for the kl loss') |
|
|
88 |
parser.add_argument('--k_embed', type=float, default=0.001, |
|
|
89 |
help='weight for the embedding loss') |
|
|
90 |
|
|
|
91 |
# Other parameters |
|
|
92 |
parser.add_argument('--deterministic', action='store_true', |
|
|
93 |
help='make the model deterministic for reproduction if set true') |
|
|
94 |
parser.add_argument('--detail', action='store_true', |
|
|
95 |
help='print more detailed information if set true') |
|
|
96 |
parser.add_argument('--epoch_to_load', type=str, default='latest', |
|
|
97 |
help='the epoch number to load, set latest to load latest cached model') |
|
|
98 |
parser.add_argument('--experiment_to_load', type=str, default='test', |
|
|
99 |
help='the experiment to load') |
|
|
100 |
|
|
|
101 |
self.initialized = True # set the initialized to True after we define the parameters of the project |
|
|
102 |
return parser |
|
|
103 |
|
|
|
104 |
def get_params(self): |
|
|
105 |
""" |
|
|
106 |
Initialize our parser with basic parameters once. |
|
|
107 |
Add additional model-specific parameters. |
|
|
108 |
""" |
|
|
109 |
if not self.initialized: # check if this object has been initialized |
|
|
110 |
# if not create a new parser object |
|
|
111 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
|
|
112 |
# use our method to initialize the parser with the predefined arguments |
|
|
113 |
parser = self.initialize(parser) |
|
|
114 |
|
|
|
115 |
# get the basic parameters |
|
|
116 |
param, _ = parser.parse_known_args() |
|
|
117 |
|
|
|
118 |
# modify model-related parser options |
|
|
119 |
model_name = param.model |
|
|
120 |
model_param_setter = models.get_param_setter(model_name) |
|
|
121 |
parser = model_param_setter(parser, self.isTrain) |
|
|
122 |
|
|
|
123 |
# save and return the parser |
|
|
124 |
self.parser = parser |
|
|
125 |
return parser.parse_args() |
|
|
126 |
|
|
|
127 |
def print_params(self, param): |
|
|
128 |
""" |
|
|
129 |
Print welcome words and command line parameters. |
|
|
130 |
Save the command line parameters in a txt file to the disk |
|
|
131 |
""" |
|
|
132 |
message = '' |
|
|
133 |
message += '\nWelcome to OmiEmbed\nby Xiaoyu Zhang x.zhang18@imperial.ac.uk\n\n' |
|
|
134 |
message += '-----------------------Running Parameters-----------------------\n' |
|
|
135 |
for key, value in sorted(vars(param).items()): |
|
|
136 |
comment = '' |
|
|
137 |
default = self.parser.get_default(key) |
|
|
138 |
if value != default: |
|
|
139 |
comment = '\t[default: %s]' % str(default) |
|
|
140 |
message += '{:>18}: {:<15}{}\n'.format(str(key), str(value), comment) |
|
|
141 |
message += '----------------------------------------------------------------\n' |
|
|
142 |
print(message) |
|
|
143 |
|
|
|
144 |
# Save the running parameters setting in the disk |
|
|
145 |
experiment_dir = os.path.join(param.checkpoints_dir, param.experiment_name) |
|
|
146 |
util.mkdir(experiment_dir) |
|
|
147 |
file_name = os.path.join(experiment_dir, 'cmd_parameters.txt') |
|
|
148 |
with open(file_name, 'w') as param_file: |
|
|
149 |
now = time.strftime('%c') |
|
|
150 |
param_file.write('{:s}\n'.format(now)) |
|
|
151 |
param_file.write(message) |
|
|
152 |
param_file.write('\n') |
|
|
153 |
|
|
|
154 |
def parse(self): |
|
|
155 |
""" |
|
|
156 |
Parse the parameters of our project. Set up GPU device. Print the welcome words and list parameters in the console. |
|
|
157 |
""" |
|
|
158 |
param = self.get_params() # get the parameters to the object param |
|
|
159 |
param.isTrain = self.isTrain |
|
|
160 |
param.isTest = self.isTest |
|
|
161 |
|
|
|
162 |
# Print welcome words and command line parameters |
|
|
163 |
self.print_params(param) |
|
|
164 |
|
|
|
165 |
# Set the internal parameters |
|
|
166 |
# epoch_num: the total epoch number |
|
|
167 |
if self.isTrain: |
|
|
168 |
param.epoch_num = param.epoch_num_p1 + param.epoch_num_p2 + param.epoch_num_p3 |
|
|
169 |
# downstream_task: for the classification task a labels.tsv file is needed, for the regression task a values.tsv file is needed |
|
|
170 |
if param.model == 'vae_classifier': |
|
|
171 |
param.downstream_task = 'classification' |
|
|
172 |
elif param.model == 'vae_regression': |
|
|
173 |
param.downstream_task = 'regression' |
|
|
174 |
elif param.model == 'vae_survival': |
|
|
175 |
param.downstream_task = 'survival' |
|
|
176 |
elif param.model == 'vae_multitask' or param.model == 'vae_multitask_gn': |
|
|
177 |
param.downstream_task = 'multitask' |
|
|
178 |
elif param.model == 'vae_alltask' or param.model == 'vae_alltask_gn': |
|
|
179 |
param.downstream_task = 'alltask' |
|
|
180 |
else: |
|
|
181 |
raise NotImplementedError('Model name [%s] is not recognized' % param.model) |
|
|
182 |
# add_channel: add one extra dimension of channel for the input data, used for convolution layer |
|
|
183 |
# ch_separate: separate the DNA methylation matrix base on the chromosome |
|
|
184 |
if param.net_VAE == 'conv_1d': |
|
|
185 |
param.add_channel = True |
|
|
186 |
param.ch_separate = False |
|
|
187 |
elif param.net_VAE == 'fc_sep': |
|
|
188 |
param.add_channel = False |
|
|
189 |
param.ch_separate = True |
|
|
190 |
elif param.net_VAE == 'fc': |
|
|
191 |
param.add_channel = False |
|
|
192 |
param.ch_separate = False |
|
|
193 |
else: |
|
|
194 |
raise NotImplementedError('VAE model name [%s] is not recognized' % param.net_VAE) |
|
|
195 |
# omics_num: the number of omics types |
|
|
196 |
param.omics_num = len(param.omics_mode) |
|
|
197 |
|
|
|
198 |
# Set up GPU |
|
|
199 |
str_gpu_ids = param.gpu_ids.split(',') |
|
|
200 |
param.gpu_ids = [] |
|
|
201 |
for str_gpu_id in str_gpu_ids: |
|
|
202 |
int_gpu_id = int(str_gpu_id) |
|
|
203 |
if int_gpu_id >= 0: |
|
|
204 |
param.gpu_ids.append(int_gpu_id) |
|
|
205 |
if len(param.gpu_ids) > 0: |
|
|
206 |
torch.cuda.set_device(param.gpu_ids[0]) |
|
|
207 |
|
|
|
208 |
self.param = param |
|
|
209 |
return self.param |