229 lines (197 with data), 26.2 kB
ÄēFf Ćsphinx.addnodesĒĆdocumentĒďĒ)ĀĒ}Ē(Ć rawsourceĒĆ ĒĆchildrenĒ]ĒĆdocutils.nodesĒĆsectionĒďĒ)ĀĒ}Ē(hhh]Ē(h ĆtitleĒďĒ)ĀĒ}Ē(hĆTutorial: Simple DeepProg modelĒh]Ēh ĆTextĒďĒĆTutorial: Simple DeepProg modelĒÖĒĀĒ}Ē(hĆTutorial: Simple DeepProg modelĒĆparentĒhubaĆ
attributesĒ}Ē(ĆidsĒ]ĒĆclassesĒ]ĒĆnamesĒ]ĒĆdupnamesĒ]ĒĆbackrefsĒ]ĒuĆtagnameĒhĆlineĒKhhĆsourceĒĆ'/home/oliver/code/SimDeep/docs/usage.mdĒubh Ć paragraphĒďĒ)ĀĒ}Ē(hĆ6The principle of DeepProg can be summarized as follow:Ēh]ĒhĆ6The principle of DeepProg can be summarized as follow:ĒÖĒĀĒ}Ē(hĆ6The principle of DeepProg can be summarized as follow:Ēhh/hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khhhhh+h,ubh Ćbullet_listĒďĒ)ĀĒ}Ē(hhh]Ē(h Ć list_itemĒďĒ)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆ+Loading of multiple samples x OMIC matricesĒh]ĒhĆ+Loading of multiple samples x OMIC matricesĒÖĒĀĒ}Ē(hĆ+Loading of multiple samples x OMIC matricesĒhhHhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KhhEhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khh@hhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆDPreprocessing ,normalisation, and sub-sampling of the input matricesĒh]ĒhĆDPreprocessing ,normalisation, and sub-sampling of the input matricesĒÖĒĀĒ}Ē(hĆDPreprocessing ,normalisation, and sub-sampling of the input matricesĒhh`hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khh]hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khh@hhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆ'Matrix transformation using autoencoderĒh]ĒhĆ'Matrix transformation using autoencoderĒÖĒĀĒ}Ē(hĆ'Matrix transformation using autoencoderĒhhxhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khhuhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khh@hhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆDetection of survival featuresĒh]ĒhĆDetection of survival featuresĒÖĒĀĒ}Ē(hĆDetection of survival featuresĒhhźhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khhćhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khh@hhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆ-Survival feature agglomeration and clusteringĒh]ĒhĆ-Survival feature agglomeration and clusteringĒÖĒĀĒ}Ē(hĆ-Survival feature agglomeration and clusteringĒhh®hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khh•hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khh@hhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆBCreation of supervised models to predict the output of new samplesĒh]ĒhĆBCreation of supervised models to predict the output of new samplesĒÖĒĀĒ}Ē(hĆBCreation of supervised models to predict the output of new samplesĒhhņhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K hhĹhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*K hh@hhh+h,ubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h>h*Khhhhh+h,ubh)ĀĒ}Ē(hhh]Ē(h)ĀĒ}Ē(hĆInput parametersĒh]ĒhĆInput parametersĒÖĒĀĒ}Ē(hĆInput parametersĒhhřubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hh*KhhŘh+h,ubh.)ĀĒ}Ē(hĆėAll the default parameters are defined in the config file: ./simdeep/config.py but can be passed dynamically. Three types of parameters must be defined:Ēh]Ē(hĆ;All the default parameters are defined in the config file: ĒÖĒĀĒ}Ē(hĆ;All the default parameters are defined in the config file: ĒhhŪhhh+Nh*Nubh ĆliteralĒďĒ)ĀĒ}Ē(hĆ./simdeep/config.pyĒh]ĒhĆ./simdeep/config.pyĒÖĒĀĒ}Ē(hhhhÝhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhhŪhhh+h,h*K ubhĆJ but can be passed dynamically. Three types of parameters must be defined:ĒÖĒĀĒ}Ē(hĆJ but can be passed dynamically. Three types of parameters must be defined:ĒhhŪhhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K
hhŘhhh+h,ubh?)ĀĒ}Ē(hhh]Ē(hD)ĀĒ}Ē(hhh]Ē(h.)ĀĒ}Ē(hĆ3The training dataset (omics + survival input files)Ēh]ĒhĆ3The training dataset (omics + survival input files)ĒÖĒĀĒ}Ē(hĆ3The training dataset (omics + survival input files)Ēhj hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khj hhh+h,ubh?)ĀĒ}Ē(hhh]ĒhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆXIn addition, the parameters of the test set, i.e. the omic dataset and the survival fileĒh]ĒhĆXIn addition, the parameters of the test set, i.e. the omic dataset and the survival fileĒÖĒĀĒ}Ē(hĆXIn addition, the parameters of the test set, i.e. the omic dataset and the survival fileĒhj, hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khj) hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khj& hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h>h*Khj hhh+h,ubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khj hhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆ[The parameters of the autoencoder (the default parameters works but it might be fine-tuned.Ēh]ĒhĆ[The parameters of the autoencoder (the default parameters works but it might be fine-tuned.ĒÖĒĀĒ}Ē(hĆ[The parameters of the autoencoder (the default parameters works but it might be fine-tuned.ĒhjP hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KhjM hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khj hhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆHThe parameters of the classification procedures (default are still good)Ēh]ĒhĆHThe parameters of the classification procedures (default are still good)ĒÖĒĀĒ}Ē(hĆHThe parameters of the classification procedures (default are still good)Ēhjh hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khje hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khj hhh+h,ubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h>h*KhhŘhhh+h,ubeh}Ē(h]ĒĆinput-parametersĒah!]Ēh#]ĒĆinput parametersĒah%]Ēh']Ēuh)h
h*Khhhhh+h,ubh)ĀĒ}Ē(hhh]Ē(h)ĀĒ}Ē(hĆInput matricesĒh]ĒhĆInput matricesĒÖĒĀĒ}Ē(hĆInput matricesĒhjé ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hh*Khjč h+h,ubh.)ĀĒ}Ē(hĆ&As examples, we included two datasets:Ēh]ĒhĆ&As examples, we included two datasets:ĒÖĒĀĒ}Ē(hĆ&As examples, we included two datasets:ĒhjĚ hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khjč hhh+h,ubh?)ĀĒ}Ē(hhh]ĒhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆ4A dummy example dataset in the example/data/ folder:Ēh]Ē(hĆA dummy example dataset in the ĒÖĒĀĒ}Ē(hĆA dummy example dataset in the Ēhj≤ hhh+Nh*Nubhų)ĀĒ}Ē(hĆ
example/data/Ēh]ĒhĆ
example/data/ĒÖĒĀĒ}Ē(hhhjĽ hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhj≤ hhh+h,h*K ubhĆ folder:ĒÖĒĀĒ}Ē(hĆ folder:Ēhj≤ hhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KhjĮ hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khj¨ hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h>h*Khjč hhh+h,ubh Ć
literal_blockĒďĒ)ĀĒ}Ē(hĆžexamples
‚Ēú‚ĒÄ‚ĒÄ data
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ meth_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ mir_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ rna_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ rna_test_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ survival_dummy.tsv
‚Ē⬆¬† ‚ĒĒ‚ĒÄ‚ĒÄ survival_test_dummy.tsvĒh]ĒhĆžexamples
‚Ēú‚ĒÄ‚ĒÄ data
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ meth_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ mir_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ rna_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ rna_test_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ survival_dummy.tsv
‚Ē⬆¬† ‚ĒĒ‚ĒÄ‚ĒÄ survival_test_dummy.tsvĒÖĒĀĒ}Ē(hhhj‚ ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒĆ xml:spaceĒĆpreserveĒuh)jŗ hjč hhh+h,h*K ubh?)ĀĒ}Ē(hhh]ĒhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆĖAnd a real dataset in the data folder. This dataset derives from the TCGA HCC cancer dataset. This dataset needs to be decompressed before processing:Ēh]Ē(hĆAnd a real dataset in the ĒÖĒĀĒ}Ē(hĆAnd a real dataset in the Ēhjķ hhh+Nh*Nubhų)ĀĒ}Ē(hĆdataĒh]ĒhĆdataĒÖĒĀĒ}Ē(hhhj hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhjķ hhh+h,h*K ubhĆx folder. This dataset derives from the TCGA HCC cancer dataset. This dataset needs to be decompressed before processing:ĒÖĒĀĒ}Ē(hĆx folder. This dataset derives from the TCGA HCC cancer dataset. This dataset needs to be decompressed before processing:Ēhjķ hhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K#hjų hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*K#hjŰ hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h>h*K#hjč hhh+h,ubjŠ )ĀĒ}Ē(hĆ\data
‚Ēú‚ĒÄ‚ĒÄ meth.tsv.gz
‚Ēú‚ĒÄ‚ĒÄ mir.tsv.gz
‚Ēú‚ĒÄ‚ĒÄ rna.tsv.gz
‚ĒĒ‚ĒÄ‚ĒÄ survival.tsv
Ēh]ĒhĆ\data
‚Ēú‚ĒÄ‚ĒÄ meth.tsv.gz
‚Ēú‚ĒÄ‚ĒÄ mir.tsv.gz
‚Ēú‚ĒÄ‚ĒÄ rna.tsv.gz
‚ĒĒ‚ĒÄ‚ĒÄ survival.tsv
ĒÖĒĀĒ}Ē(hhhj( ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚ jů uh)jŗ hjč hhh+h,h*K ubh.)ĀĒ}Ē(hĆ/An input matrix file should follow this format:Ēh]ĒhĆ/An input matrix file should follow this format:ĒÖĒĀĒ}Ē(hĆ/An input matrix file should follow this format:Ēhj8 hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K.hjč hhh+h,ubjŠ )ĀĒ}Ē(hX] head mir_dummy.tsv
Samples dummy_mir_0 dummy_mir_1 dummy_mir_2 dummy_mir_3 ...
sample_test_0 0.469656032287 0.347987447237 0.706633335508 0.440068758445 ...
sample_test_1 0.0453108219657 0.0234642968791 0.593393816691 0.981872970341 ...
sample_test_2 0.908784043793 0.854397550009 0.575879144667 0.553333958713 ...
...
Ēh]ĒhX] head mir_dummy.tsv
Samples dummy_mir_0 dummy_mir_1 dummy_mir_2 dummy_mir_3 ...
sample_test_0 0.469656032287 0.347987447237 0.706633335508 0.440068758445 ...
sample_test_1 0.0453108219657 0.0234642968791 0.593393816691 0.981872970341 ...
sample_test_2 0.908784043793 0.854397550009 0.575879144667 0.553333958713 ...
...
ĒÖĒĀĒ}Ē(hhhjG ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚ jů uh)jŗ hjč hhh+h,h*K ubh.)ĀĒ}Ē(hĆ[Also, if multiple matrices are used as input, they must keep the sample order. For example:Ēh]ĒhĆ[Also, if multiple matrices are used as input, they must keep the sample order. For example:ĒÖĒĀĒ}Ē(hĆ[Also, if multiple matrices are used as input, they must keep the sample order. For example:ĒhjW hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K;hjč hhh+h,ubjŠ )ĀĒ}Ē(hXS head rna_dummy.tsv
Samples dummy_gene_0 dummy_gene_1 dummy_gene_2 dummy_gene_3 ...
sample_test_0 0.69656032287 0.47987447237 0.06633335508 0.40068758445 ...
sample_test_1 0.53108219657 0.234642968791 0.93393816691 0.81872970341 ...
sample_test_2 0.8784043793 0.54397550009 0.75879144667 0.53333958713 ...
...
Ēh]ĒhXS head rna_dummy.tsv
Samples dummy_gene_0 dummy_gene_1 dummy_gene_2 dummy_gene_3 ...
sample_test_0 0.69656032287 0.47987447237 0.06633335508 0.40068758445 ...
sample_test_1 0.53108219657 0.234642968791 0.93393816691 0.81872970341 ...
sample_test_2 0.8784043793 0.54397550009 0.75879144667 0.53333958713 ...
...
ĒÖĒĀĒ}Ē(hhhjf ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚ jů uh)jŗ hjč hhh+h,h*K ubh.)ĀĒ}Ē(hĆnThe arguments training_tsv and path_data from the extract_data module are used to defined the input matrices.Ēh]Ē(hĆThe arguments ĒÖĒĀĒ}Ē(hĆThe arguments Ēhjv hhh+Nh*Nubhų)ĀĒ}Ē(hĆtraining_tsvĒh]ĒhĆtraining_tsvĒÖĒĀĒ}Ē(hhhj hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhjv hhh+h,h*K ubhĆ and ĒÖĒĀĒ}Ē(hĆ and Ēhjv hhh+Nh*Nubhų)ĀĒ}Ē(hĆ path_dataĒh]ĒhĆ path_dataĒÖĒĀĒ}Ē(hhhjí hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhjv hhh+h,h*K ubhĆ
from the ĒÖĒĀĒ}Ē(hĆ
from the Ēhjv hhh+Nh*Nubhų)ĀĒ}Ē(hĆextract_dataĒh]ĒhĆextract_dataĒÖĒĀĒ}Ē(hhhj• hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhjv hhh+h,h*K ubhĆ/ module are used to defined the input matrices.ĒÖĒĀĒ}Ē(hĆ/ module are used to defined the input matrices.Ēhjv hhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KHhjč hhh+h,ubjŠ )ĀĒ}Ē(hĆ»# The keys/values of this dict represent the name of the omic and the corresponding input matrix
training_tsv = {
'GE': 'rna_dummy.tsv',
'MIR': 'mir_dummy.tsv',
'METH': 'meth_dummy.tsv',
}Ēh]ĒhĆ»# The keys/values of this dict represent the name of the omic and the corresponding input matrix
training_tsv = {
'GE': 'rna_dummy.tsv',
'MIR': 'mir_dummy.tsv',
'METH': 'meth_dummy.tsv',
}ĒÖĒĀĒ}Ē(hhhjĺ ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆpythonĒjÚ jů uh)jŗ hjč hhh+h,h*K ubh.)ĀĒ}Ē(hĆ&a survival file must have this format:Ēh]ĒhĆ&a survival file must have this format:ĒÖĒĀĒ}Ē(hĆ&a survival file must have this format:Ēhjő hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KShjč hhh+h,ubjŠ )ĀĒ}Ē(hĆĒhead survival_dummy.tsv
barcode days recurrence
sample_test_0 134 1
sample_test_1 291 0
sample_test_2 125 1
sample_test_3 43 0
...
Ēh]ĒhĆĒhead survival_dummy.tsv
barcode days recurrence
sample_test_0 134 1
sample_test_1 291 0
sample_test_2 125 1
sample_test_3 43 0
...
ĒÖĒĀĒ}Ē(hhhj› ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚ jů uh)jŗ hjč hhh+h,h*K ubh.)ĀĒ}Ē(hĆéIn addition, the fields corresponding to the patient IDs, the survival time, and the event should be defined using the survival_flag argument:Ēh]Ē(hĆwIn addition, the fields corresponding to the patient IDs, the survival time, and the event should be defined using the ĒÖĒĀĒ}Ē(hĆwIn addition, the fields corresponding to the patient IDs, the survival time, and the event should be defined using the ĒhjŪ hhh+Nh*Nubhų)ĀĒ}Ē(hĆ
survival_flagĒh]ĒhĆ
survival_flagĒÖĒĀĒ}Ē(hhhjŲ hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhjŪ hhh+h,h*K ubhĆ
argument:ĒÖĒĀĒ}Ē(hĆ
argument:ĒhjŪ hhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Kahjč hhh+h,ubjŠ )ĀĒ}Ē(hĆÜ#Default value
survival_flag = {'patient_id': 'barcode',
'survival': 'days',
'event': 'recurrence'}Ēh]ĒhĆÜ#Default value
survival_flag = {'patient_id': 'barcode',
'survival': 'days',
'event': 'recurrence'}ĒÖĒĀĒ}Ē(hhhj ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆpythonĒjÚ jů uh)jŗ hjč hhh+h,h*K ubeh}Ē(h]ĒĆinput-matricesĒah!]Ēh#]ĒĆinput matricesĒah%]Ēh']Ēuh)h
h*Khhhhh+h,ubh)ĀĒ}Ē(hhh]Ē(h)ĀĒ}Ē(hĆCCreating a simple DeepProg model with one autoencoder for each omicĒh]ĒhĆCCreating a simple DeepProg model with one autoencoder for each omicĒÖĒĀĒ}Ē(hĆCCreating a simple DeepProg model with one autoencoder for each omicĒhj* ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hh*Kjhj' h+h,ubh.)ĀĒ}Ē(hĆŰFirst, we will build a model using the example dataset from ./examples/data/ (These example files are set as default in the config.py file). We will use them to show how to construct a single DeepProg model inferring a autoencoder for each omicĒh]Ē(hĆ<First, we will build a model using the example dataset from ĒÖĒĀĒ}Ē(hĆ<First, we will build a model using the example dataset from Ēhj9 hhh+Nh*Nubhų)ĀĒ}Ē(hĆ./examples/data/Ēh]ĒhĆ./examples/data/ĒÖĒĀĒ}Ē(hhhjB hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhj9 hhh+h,h*K ubhĆ® (These example files are set as default in the config.py file). We will use them to show how to construct a single DeepProg model inferring a autoencoder for each omicĒÖĒĀĒ}Ē(hĆ® (These example files are set as default in the config.py file). We will use them to show how to construct a single DeepProg model inferring a autoencoder for each omicĒhj9 hhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Klhj' hhh+h,ubjŠ )ĀĒ}Ē(hX"
# SimDeep class can be used to build one model with one autoencoder for each omic
from simdeep.simdeep_analysis import SimDeep
from simdeep.extract_data import LoadData
help(SimDeep) # to see all the functions
help(LoadData) # to see all the functions related to loading datasets
# Defining training datasets
from simdeep.config import TRAINING_TSV
from simdeep.config import SURVIVAL_TSV
# Location of the input matrices and survival file
from simdeep.config import PATH_DATA
dataset = LoadData(training_tsv=TRAINING_TSV,
survival_tsv=SURVIVAL_TSV,
path_data=PATH_DATA)
# Defining the result path in which will be created an output folder
PATH_RESULTS = "./TEST_DUMMY/"
# instantiate the model with the dummy example training dataset defined in the config file
simDeep = SimDeep(
dataset=dataset,
path_results=PATH_RESULTS,
path_to_save_modelPATH_RESULTS, # This result path can be used to save the autoencoder
)
simDeep.load_training_dataset() # load the training dataset
simDeep.fit() # fit the modelĒh]ĒhX"
# SimDeep class can be used to build one model with one autoencoder for each omic
from simdeep.simdeep_analysis import SimDeep
from simdeep.extract_data import LoadData
help(SimDeep) # to see all the functions
help(LoadData) # to see all the functions related to loading datasets
# Defining training datasets
from simdeep.config import TRAINING_TSV
from simdeep.config import SURVIVAL_TSV
# Location of the input matrices and survival file
from simdeep.config import PATH_DATA
dataset = LoadData(training_tsv=TRAINING_TSV,
survival_tsv=SURVIVAL_TSV,
path_data=PATH_DATA)
# Defining the result path in which will be created an output folder
PATH_RESULTS = "./TEST_DUMMY/"
# instantiate the model with the dummy example training dataset defined in the config file
simDeep = SimDeep(
dataset=dataset,
path_results=PATH_RESULTS,
path_to_save_modelPATH_RESULTS, # This result path can be used to save the autoencoder
)
simDeep.load_training_dataset() # load the training dataset
simDeep.fit() # fit the modelĒÖĒĀĒ}Ē(hhhj[ ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆpythonĒjÚ jů uh)jŗ hj' hhh+h,h*K ubh.)ĀĒ}Ē(hĆ\At that point, the model is fitted and some output files are available in the output folder:Ēh]ĒhĆ\At that point, the model is fitted and some output files are available in the output folder:ĒÖĒĀĒ}Ē(hĆ\At that point, the model is fitted and some output files are available in the output folder:Ēhjk hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KŹhj' hhh+h,ubjŠ )ĀĒ}Ē(hĆyTEST_DUMMY
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_KM_plot_training_dataset.png
‚ĒĒ‚ĒÄ‚ĒÄ test_dummy_dataset_training_set_labels.tsvĒh]ĒhĆyTEST_DUMMY
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_KM_plot_training_dataset.png
‚ĒĒ‚ĒÄ‚ĒÄ test_dummy_dataset_training_set_labels.tsvĒÖĒĀĒ}Ē(hhhjz ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚ jů uh)jŗ hj' hhh+h,h*K ubh.)ĀĒ}Ē(hĆJThe tsv file contains the label and the label probability for each sample:Ēh]ĒhĆJThe tsv file contains the label and the label probability for each sample:ĒÖĒĀĒ}Ē(hĆJThe tsv file contains the label and the label probability for each sample:Ēhjä hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Kóhj' hhh+h,ubjŠ )ĀĒ}Ē(hĆŲsample_test_0 1 7.22678272919e-12
sample_test_1 1 4.48594196888e-09
sample_test_4 1 1.53363205571e-06
sample_test_5 1 6.72170409655e-08
sample_test_6 0 0.9996581662
sample_test_7 1 3.38139255666e-08Ēh]ĒhĆŲsample_test_0 1 7.22678272919e-12
sample_test_1 1 4.48594196888e-09
sample_test_4 1 1.53363205571e-06
sample_test_5 1 6.72170409655e-08
sample_test_6 0 0.9996581662
sample_test_7 1 3.38139255666e-08ĒÖĒĀĒ}Ē(hhhjô ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚ jů uh)jŗ hj' hhh+h,h*K ubh.)ĀĒ}Ē(hĆ;And we also have the visualisation of a Kaplan-Meier Curve:Ēh]ĒhĆ;And we also have the visualisation of a Kaplan-Meier Curve:ĒÖĒĀĒ}Ē(hĆ;And we also have the visualisation of a Kaplan-Meier Curve:Ēhj© hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KĘhj' hhh+h,ubh.)ĀĒ}Ē(hĆKM plotĒh]Ēh ĆimageĒďĒ)ĀĒ}Ē(hjļ h]ĒhhÖĒĀĒ}Ē(hhhjĺ hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆuriĒĆ5./img/test_dummy_dataset_KM_plot_training_dataset.pngĒĆaltĒjļ Ć
candidatesĒ}ĒĆ*Ējň suh)jľ hjł hhh+h,h*K ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K§hj' hhh+h,ubh.)ĀĒ}Ē(hX$ Now we are ready to use a test dataset and to infer the class label for the test samples.
The test dataset do not need to have the same input omic matrices than the training dataset and not even the sample features for a given omic. However, it needs to have at least some features in common.Ēh]Ē(hĆYNow we are ready to use a test dataset and to infer the class label for the test samples.ĒÖĒĀĒ}Ē(hĆYNow we are ready to use a test dataset and to infer the class label for the test samples.Ēhj÷ hhh+Nh*NubhĆ
ĒÖĒĀĒ}Ē(hhhj÷ hhh+Nh*NubhĆ The test dataset do not need to have the same input omic matrices than the training dataset and not even the sample features for a given omic. However, it needs to have at least some features in common.ĒÖĒĀĒ}Ē(hĆ The test dataset do not need to have the same input omic matrices than the training dataset and not even the sample features for a given omic. However, it needs to have at least some features in common.Ēhj÷ hhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K¶hj' hhh+h,ubjŠ )ĀĒ}Ē(hXŃ # Defining test datasets
from simdeep.config import TEST_TSV
from simdeep.config import SURVIVAL_TSV_TEST
simDeep.load_new_test_dataset(
TEST_TSV,
fname_key='dummy'
SURVIVAL_TSV_TEST, # [OPTIONAL] test survival file useful to compute accuracy of test dataset
)
# The test set is a dummy rna expression (generated randomly)
print(simDeep.dataset.test_tsv) # Defined in the config file
# The data type of the test set is also defined to match an existing type
print(simDeep.dataset.data_type) # Defined in the config file
simDeep.predict_labels_on_test_dataset() # Perform the classification analysis and label the set dataset
print(simDeep.test_labels)
print(simDeep.test_labels_proba)
Ēh]ĒhXŃ # Defining test datasets
from simdeep.config import TEST_TSV
from simdeep.config import SURVIVAL_TSV_TEST
simDeep.load_new_test_dataset(
TEST_TSV,
fname_key='dummy'
SURVIVAL_TSV_TEST, # [OPTIONAL] test survival file useful to compute accuracy of test dataset
)
# The test set is a dummy rna expression (generated randomly)
print(simDeep.dataset.test_tsv) # Defined in the config file
# The data type of the test set is also defined to match an existing type
print(simDeep.dataset.data_type) # Defined in the config file
simDeep.predict_labels_on_test_dataset() # Perform the classification analysis and label the set dataset
print(simDeep.test_labels)
print(simDeep.test_labels_proba)
ĒÖĒĀĒ}Ē(hhhjÓ ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆpythonĒjÚ jů uh)jŗ hj' hhh+h,h*K ubh.)ĀĒ}Ē(hĆgThe assigned class and class probabilities for the test samples are now available in the output folder:Ēh]ĒhĆgThe assigned class and class probabilities for the test samples are now available in the output folder:ĒÖĒĀĒ}Ē(hĆgThe assigned class and class probabilities for the test samples are now available in the output folder:ĒhjĢ hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Kņhj' hhh+h,ubjŠ )ĀĒ}Ē(hX TEST_DUMMY
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_dummy_KM_plot_test.png
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_dummy_test_labels.tsv
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_KM_plot_training_dataset.png
‚ĒĒ‚ĒÄ‚ĒÄ test_dummy_dataset_training_set_labels.tsv
head test_dummy_dataset_training_set_labels.tsv
Ēh]ĒhX TEST_DUMMY
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_dummy_KM_plot_test.png
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_dummy_test_labels.tsv
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_KM_plot_training_dataset.png
‚ĒĒ‚ĒÄ‚ĒÄ test_dummy_dataset_training_set_labels.tsv
head test_dummy_dataset_training_set_labels.tsv
ĒÖĒĀĒ}Ē(hhhj
ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚ jů uh)jŗ hj' hhh+h,h*K ubh.)ĀĒ}Ē(hĆ7And a KM plot is also constructed using the test labelsĒh]ĒhĆ7And a KM plot is also constructed using the test labelsĒÖĒĀĒ}Ē(hĆ7And a KM plot is also constructed using the test labelsĒhj hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KŌhj' hhh+h,ubh.)ĀĒ}Ē(hĆKM plot testĒh]ĒjĹ )ĀĒ}Ē(hj. h]ĒhhÖĒĀĒ}Ē(hhhj0 hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēj Ć/./img/test_dummy_dataset_dummy_KM_plot_test.pngĒjŐ j. jÕ }ĒjŌ j< suh)jľ hj, hhh+h,h*K ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K—hj' hhh+h,ubh.)ĀĒ}Ē(hĆ0Finally, it is possible to save the keras model:Ēh]ĒhĆ0Finally, it is possible to save the keras model:ĒÖĒĀĒ}Ē(hĆ0Finally, it is possible to save the keras model:ĒhjD hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K”hj' hhh+h,ubjŠ )ĀĒ}Ē(hĆ)simDeep.save_encoders('dummy_encoder.h5')Ēh]ĒhĆ)simDeep.save_encoders('dummy_encoder.h5')ĒÖĒĀĒ}Ē(hhhjS ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆpythonĒjÚ jů uh)jŗ hj' hhh+h,h*K ubeh}Ē(h]ĒĆCcreating-a-simple-deepprog-model-with-one-autoencoder-for-each-omicĒah!]Ēh#]ĒĆCcreating a simple deepprog model with one autoencoder for each omicĒah%]Ēh']Ēuh)h
h*Kjhhhhh+h,ubeh}Ē(h]ĒĆtutorial-simple-deepprog-modelĒah!]Ēh#]ĒĆtutorial: simple deepprog modelĒah%]Ēh']Ēuh)h
h*Khhhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆsourceĒh,uh)hĆcurrent_sourceĒNĆcurrent_lineĒNĆsettingsĒĆdocutils.frontendĒĆValuesĒďĒ)ĀĒ}Ē(hNĆ generatorĒNĆ datestampĒNĆsource_linkĒNĆ
source_urlĒNĆ
toc_backlinksĒĆentryĒĆfootnote_backlinksĒKĆ
sectnum_xformĒKĆstrip_commentsĒNĆstrip_elements_with_classesĒNĆ
strip_classesĒNĆreport_levelĒKĆ
halt_levelĒKĆexit_status_levelĒKĆdebugĒNĆwarning_streamĒNĆ tracebackĒąĆinput_encodingĒĆ utf-8-sigĒĆinput_encoding_error_handlerĒĆstrictĒĆoutput_encodingĒĆutf-8ĒĆoutput_encoding_error_handlerĒjĖ Ćerror_encodingĒĆutf-8ĒĆerror_encoding_error_handlerĒĆbackslashreplaceĒĆ
language_codeĒĆenĒĆrecord_dependenciesĒNĆconfigĒNĆ id_prefixĒhĆauto_id_prefixĒĆidĒĆ
dump_settingsĒNĆdump_internalsĒNĆdump_transformsĒNĆdump_pseudo_xmlĒNĆexpose_internalsĒNĆstrict_visitorĒNĆ_disable_configĒNĆ_sourceĒh,Ć_destinationĒNĆ
_config_filesĒ]ĒĆpep_referencesĒNĆpep_base_urlĒĆ https://www.python.org/dev/peps/ĒĆpep_file_url_templateĒĆpep-%04dĒĆrfc_referencesĒNĆrfc_base_urlĒĆhttps://tools.ietf.org/html/ĒĆ tab_widthĒKĆtrim_footnote_reference_spaceĒČĆfile_insertion_enabledĒąĆraw_enabledĒKĆsyntax_highlightĒĆlongĒĆsmart_quotesĒąĆsmartquotes_localesĒ]ĒĆcharacter_level_inline_markupĒČĆdoctitle_xformĒČĆ
docinfo_xformĒKĆsectsubtitle_xformĒČĆembed_stylesheetĒČĆcloak_email_addressesĒąĆenvĒNubĆreporterĒNĆindirect_targetsĒ]ĒĆsubstitution_defsĒ}ĒĆsubstitution_namesĒ}ĒĆrefnamesĒ}ĒĆrefidsĒ}ĒĆnameidsĒ}Ē(jp jm ją jÖ j$ j! jh je uĆ nametypesĒ}Ē(jp Nją Nj$ Njh Nuh}Ē(jm hjÖ hŘj! jč je j' uĆ
footnote_refsĒ}ĒĆ
citation_refsĒ}ĒĆ
autofootnotesĒ]ĒĆautofootnote_refsĒ]ĒĆsymbol_footnotesĒ]ĒĆsymbol_footnote_refsĒ]ĒĆ footnotesĒ]ĒĆ citationsĒ]ĒĆautofootnote_startĒKĆsymbol_footnote_startĒK Ć
id_counterĒĆcollectionsĒĆCounterĒďĒ}ĒÖĒRĒĆparse_messagesĒ]ĒĆtransform_messagesĒ]ĒĆtransformerĒNĆ
decorationĒNhhub.