[53737a]: / examples / create_autoencoder_from_scratch.py

Download this file

64 lines (49 with data), 1.8 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
"""
Create a new autoencoder model from scratch using user defined .tsv input files
"""
from simdeep.extract_data import LoadData
from simdeep.simdeep_analysis import SimDeep
from simdeep.config import PATH_DATA
from simdeep.config import PATH_TO_SAVE_MODEL
def main():
""" """
# Defining the path for the data
# (we will the default path defined in config.py) but can be overloaded
path_data = PATH_DATA
print('path to access the .tsv files: '+ path_data)
# Defining the path to save the autoencoder
path_to_save_model = PATH_TO_SAVE_MODEL
print('path where the models will be saved:' + path_to_save_model)
# the dataset to be used
# Here we will combine only two omics to create the autoencoder:
# RNA and MIR.
# We will use the dummy dataset available in the example folder
#These files should be inside the dataset_path folder
tsv_files = {
'RNA': 'rna_dummy.tsv',
'MIR': 'mir_dummy.tsv'
}
# survival file to be used
survival_file = 'survival_dummy.tsv'
# Metadata file (optional)
metadata_file = "metadata_dummy.tsv"
# class to load and prepare the data
dataset = LoadData(path_data=path_data,
training_tsv=tsv_files,
survival_tsv=survival_file,
metadata_tsv=metadata_file # Optional
)
simDeep = SimDeep(dataset=dataset,
path_to_save_model=path_to_save_model,
seed=2020
)
# dataset must be loaded
simDeep.load_training_dataset()
# model construction
simDeep.fit()
# predict on full dataset
simDeep.predict_labels_on_full_dataset()
# Finally, saving the model
simDeep.save_encoders('encoder_example.h5')
if __name__ == "__main__":
main()