a b/params/basic_params.py
1
import time
2
import argparse
3
import torch
4
import os
5
import models
6
from util import util
7
8
9
class BasicParams:
10
    """
11
    This class define the console parameters
12
    """
13
14
    def __init__(self):
15
        """
16
        Reset the class. Indicates the class hasn't been initialized
17
        """
18
        self.initialized = False
19
        self.isTrain = True
20
        self.isTest = True
21
22
    def initialize(self, parser):
23
        """
24
        Define the common console parameters
25
        """
26
        parser.add_argument('--gpu_ids', type=str, default='0',
27
                            help='which GPU would like to use: e.g. 0 or 0,1, -1 for CPU')
28
        parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints',
29
                            help='models, settings and intermediate results are saved in folder in this directory')
30
        parser.add_argument('--experiment_name', type=str, default='test',
31
                            help='name of the folder in the checkpoint directory')
32
33
        # Dataset parameters
34
        parser.add_argument('--omics_mode', type=str, default='a',
35
                            help='omics types would like to use in the model, options: [abc | ab | a | b | c]')
36
        parser.add_argument('--data_root', type=str, default='./data',
37
                            help='path to input data')
38
        parser.add_argument('--batch_size', type=int, default=32,
39
                            help='input data batch size')
40
        parser.add_argument('--num_threads', default=0, type=int,
41
                            help='number of threads for loading data')
42
        parser.add_argument('--set_pin_memory', action='store_true',
43
                            help='set pin_memory in the dataloader to increase data loading performance')
44
        parser.add_argument('--not_stratified', action='store_true',
45
                            help='do not apply the stratified mode in train/test split if set true')
46
        parser.add_argument('--use_sample_list', action='store_true',
47
                            help='provide a subset sample list of the dataset, store in the path data_root/sample_list.tsv, if False use all the samples')
48
        parser.add_argument('--use_feature_lists', action='store_true',
49
                            help='provide feature lists of the input omics data, e.g. data_root/feature_list_A.tsv, if False use all the features')
50
        parser.add_argument('--detect_na', action='store_true',
51
                            help='detect missing value markers during data loading, stay False can improve the loading performance')
52
        parser.add_argument('--file_format', type=str, default='tsv',
53
                            help='file format of the omics data, options: [tsv | csv | hdf]')
54
55
        # Model parameters
56
        parser.add_argument('--model', type=str, default='vae_classifier',
57
                            help='chooses which model want to use, options: [vae_classifier | vae_regression | vae_survival | vae_multitask]')
58
        parser.add_argument('--net_VAE', type=str, default='fc_sep',
59
                            help='specify the backbone of the VAE, default is the one dimensional CNN, options: [conv_1d | fc_sep | fc]')
60
        parser.add_argument('--net_down', type=str, default='multi_FC_classifier',
61
                            help='specify the backbone of the downstream task network, default is the multi-layer FC classifier, options: [multi_FC_classifier | multi_FC_regression | multi_FC_survival | multi_FC_multitask]')
62
        parser.add_argument('--norm_type', type=str, default='batch',
63
                            help='the type of normalization applied to the model, default to use batch normalization, options: [batch | instance | none ]')
64
        parser.add_argument('--filter_num', type=int, default=8,
65
                            help='number of filters in the last convolution layer in the generator')
66
        parser.add_argument('--conv_k_size', type=int, default=9,
67
                            help='the kernel size of convolution layer, default kernel size is 9, the kernel is one dimensional.')
68
        parser.add_argument('--dropout_p', type=float, default=0.2,
69
                            help='probability of an element to be zeroed in a dropout layer, default is 0 which means no dropout.')
70
        parser.add_argument('--leaky_slope', type=float, default=0.2,
71
                            help='the negative slope of the Leaky ReLU activation function')
72
        parser.add_argument('--latent_space_dim', type=int, default=128,
73
                            help='the dimensionality of the latent space')
74
        parser.add_argument('--seed', type=int, default=42,
75
                            help='random seed')
76
        parser.add_argument('--init_type', type=str, default='normal',
77
                            help='choose the method of network initialization, options: [normal | xavier_normal | xavier_uniform | kaiming_normal | kaiming_uniform | orthogonal]')
78
        parser.add_argument('--init_gain', type=float, default=0.02,
79
                            help='scaling factor for normal, xavier and orthogonal initialization methods')
80
81
        # Loss parameters
82
        parser.add_argument('--recon_loss', type=str, default='BCE',
83
                            help='chooses the reconstruction loss function, options: [BCE | MSE | L1]')
84
        parser.add_argument('--reduction', type=str, default='mean',
85
                            help='chooses the reduction to apply to the loss function, options: [sum | mean]')
86
        parser.add_argument('--k_kl', type=float, default=0.01,
87
                            help='weight for the kl loss')
88
        parser.add_argument('--k_embed', type=float, default=0.001,
89
                            help='weight for the embedding loss')
90
91
        # Other parameters
92
        parser.add_argument('--deterministic', action='store_true',
93
                            help='make the model deterministic for reproduction if set true')
94
        parser.add_argument('--detail', action='store_true',
95
                            help='print more detailed information if set true')
96
        parser.add_argument('--epoch_to_load', type=str, default='latest',
97
                            help='the epoch number to load, set latest to load latest cached model')
98
        parser.add_argument('--experiment_to_load', type=str, default='test',
99
                            help='the experiment to load')
100
101
        self.initialized = True  # set the initialized to True after we define the parameters of the project
102
        return parser
103
104
    def get_params(self):
105
        """
106
        Initialize our parser with basic parameters once.
107
        Add additional model-specific parameters.
108
        """
109
        if not self.initialized:  # check if this object has been initialized
110
            # if not create a new parser object
111
            parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
112
            #  use our method to initialize the parser with the predefined arguments
113
            parser = self.initialize(parser)
114
115
        # get the basic parameters
116
        param, _ = parser.parse_known_args()
117
118
        # modify model-related parser options
119
        model_name = param.model
120
        model_param_setter = models.get_param_setter(model_name)
121
        parser = model_param_setter(parser, self.isTrain)
122
123
        # save and return the parser
124
        self.parser = parser
125
        return parser.parse_args()
126
127
    def print_params(self, param):
128
        """
129
        Print welcome words and command line parameters.
130
        Save the command line parameters in a txt file to the disk
131
        """
132
        message = ''
133
        message += '\nWelcome to OmiEmbed\nby Xiaoyu Zhang x.zhang18@imperial.ac.uk\n\n'
134
        message += '-----------------------Running Parameters-----------------------\n'
135
        for key, value in sorted(vars(param).items()):
136
            comment = ''
137
            default = self.parser.get_default(key)
138
            if value != default:
139
                comment = '\t[default: %s]' % str(default)
140
            message += '{:>18}: {:<15}{}\n'.format(str(key), str(value), comment)
141
        message += '----------------------------------------------------------------\n'
142
        print(message)
143
144
        # Save the running parameters setting in the disk
145
        experiment_dir = os.path.join(param.checkpoints_dir, param.experiment_name)
146
        util.mkdir(experiment_dir)
147
        file_name = os.path.join(experiment_dir, 'cmd_parameters.txt')
148
        with open(file_name, 'w') as param_file:
149
            now = time.strftime('%c')
150
            param_file.write('{:s}\n'.format(now))
151
            param_file.write(message)
152
            param_file.write('\n')
153
154
    def parse(self):
155
        """
156
        Parse the parameters of our project. Set up GPU device. Print the welcome words and list parameters in the console.
157
        """
158
        param = self.get_params()  # get the parameters to the object param
159
        param.isTrain = self.isTrain
160
        param.isTest = self.isTest
161
162
        # Print welcome words and command line parameters
163
        self.print_params(param)
164
165
        # Set the internal parameters
166
        # epoch_num: the total epoch number
167
        if self.isTrain:
168
            param.epoch_num = param.epoch_num_p1 + param.epoch_num_p2 + param.epoch_num_p3
169
        # downstream_task: for the classification task a labels.tsv file is needed, for the regression task a values.tsv file is needed
170
        if param.model == 'vae_classifier':
171
            param.downstream_task = 'classification'
172
        elif param.model == 'vae_regression':
173
            param.downstream_task = 'regression'
174
        elif param.model == 'vae_survival':
175
            param.downstream_task = 'survival'
176
        elif param.model == 'vae_multitask' or param.model == 'vae_multitask_gn':
177
            param.downstream_task = 'multitask'
178
        elif param.model == 'vae_alltask' or param.model == 'vae_alltask_gn':
179
            param.downstream_task = 'alltask'
180
        else:
181
            raise NotImplementedError('Model name [%s] is not recognized' % param.model)
182
        # add_channel: add one extra dimension of channel for the input data, used for convolution layer
183
        # ch_separate: separate the DNA methylation matrix base on the chromosome
184
        if param.net_VAE == 'conv_1d':
185
            param.add_channel = True
186
            param.ch_separate = False
187
        elif param.net_VAE == 'fc_sep':
188
            param.add_channel = False
189
            param.ch_separate = True
190
        elif param.net_VAE == 'fc':
191
            param.add_channel = False
192
            param.ch_separate = False
193
        else:
194
            raise NotImplementedError('VAE model name [%s] is not recognized' % param.net_VAE)
195
        # omics_num: the number of omics types
196
        param.omics_num = len(param.omics_mode)
197
198
        # Set up GPU
199
        str_gpu_ids = param.gpu_ids.split(',')
200
        param.gpu_ids = []
201
        for str_gpu_id in str_gpu_ids:
202
            int_gpu_id = int(str_gpu_id)
203
            if int_gpu_id >= 0:
204
                param.gpu_ids.append(int_gpu_id)
205
        if len(param.gpu_ids) > 0:
206
            torch.cuda.set_device(param.gpu_ids[0])
207
208
        self.param = param
209
        return self.param