[53737a]: / docs / _build / doctrees / usage.doctree

Download this file

229 lines (197 with data), 26.2 kB

ÄēFfĆsphinx.addnodesĒĆdocumentĒďĒ)ĀĒ}Ē(Ć	rawsourceĒĆĒĆchildrenĒ]ĒĆdocutils.nodesĒĆsectionĒďĒ)ĀĒ}Ē(hhh]Ē(h	ĆtitleĒďĒ)ĀĒ}Ē(hĆTutorial: Simple DeepProg modelĒh]Ēh	ĆTextĒďĒĆTutorial: Simple DeepProg modelĒÖĒĀĒ}Ē(hĆTutorial: Simple DeepProg modelĒĆparentĒhubaĆ
attributesĒ}Ē(ĆidsĒ]ĒĆclassesĒ]ĒĆnamesĒ]ĒĆdupnamesĒ]ĒĆbackrefsĒ]ĒuĆtagnameĒhĆlineĒKhhĆsourceĒĆ'/home/oliver/code/SimDeep/docs/usage.mdĒubh	Ć	paragraphĒďĒ)ĀĒ}Ē(hĆ6The principle of DeepProg can be summarized as follow:Ēh]ĒhĆ6The principle of DeepProg can be summarized as follow:ĒÖĒĀĒ}Ē(hĆ6The principle of DeepProg can be summarized as follow:Ēhh/hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khhhhh+h,ubh	Ćbullet_listĒďĒ)ĀĒ}Ē(hhh]Ē(h	Ć	list_itemĒďĒ)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆ+Loading of multiple samples x OMIC matricesĒh]ĒhĆ+Loading of multiple samples x OMIC matricesĒÖĒĀĒ}Ē(hĆ+Loading of multiple samples x OMIC matricesĒhhHhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KhhEhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khh@hhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆDPreprocessing ,normalisation, and sub-sampling of the input matricesĒh]ĒhĆDPreprocessing ,normalisation, and sub-sampling of the input matricesĒÖĒĀĒ}Ē(hĆDPreprocessing ,normalisation, and sub-sampling of the input matricesĒhh`hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khh]hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khh@hhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆ'Matrix transformation using autoencoderĒh]ĒhĆ'Matrix transformation using autoencoderĒÖĒĀĒ}Ē(hĆ'Matrix transformation using autoencoderĒhhxhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khhuhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khh@hhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆDetection of survival featuresĒh]ĒhĆDetection of survival featuresĒÖĒĀĒ}Ē(hĆDetection of survival featuresĒhhźhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khhćhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khh@hhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆ-Survival feature agglomeration and clusteringĒh]ĒhĆ-Survival feature agglomeration and clusteringĒÖĒĀĒ}Ē(hĆ-Survival feature agglomeration and clusteringĒhh®hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khh•hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khh@hhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆBCreation of supervised models to predict the output of new samplesĒh]ĒhĆBCreation of supervised models to predict the output of new samplesĒÖĒĀĒ}Ē(hĆBCreation of supervised models to predict the output of new samplesĒhhņhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K	hhĹhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*K	hh@hhh+h,ubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h>h*Khhhhh+h,ubh)ĀĒ}Ē(hhh]Ē(h)ĀĒ}Ē(hĆInput parametersĒh]ĒhĆInput parametersĒÖĒĀĒ}Ē(hĆInput parametersĒhhřubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hh*KhhŘh+h,ubh.)ĀĒ}Ē(hĆėAll the default parameters are defined in the config file: ./simdeep/config.py but can be passed dynamically. Three types of parameters must be defined:Ēh]Ē(hĆ;All the default parameters are defined in the config file: ĒÖĒĀĒ}Ē(hĆ;All the default parameters are defined in the config file: ĒhhŪhhh+Nh*Nubh	ĆliteralĒďĒ)ĀĒ}Ē(hĆ./simdeep/config.pyĒh]ĒhĆ./simdeep/config.pyĒÖĒĀĒ}Ē(hhhhÝhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhhŪhhh+h,h*KubhĆJ but can be passed dynamically. Three types of parameters must be defined:ĒÖĒĀĒ}Ē(hĆJ but can be passed dynamically. Three types of parameters must be defined:ĒhhŪhhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K
hhŘhhh+h,ubh?)ĀĒ}Ē(hhh]Ē(hD)ĀĒ}Ē(hhh]Ē(h.)ĀĒ}Ē(hĆ3The training dataset (omics + survival input files)Ēh]ĒhĆ3The training dataset (omics + survival input files)ĒÖĒĀĒ}Ē(hĆ3The training dataset (omics + survival input files)Ēhjhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khjhhh+h,ubh?)ĀĒ}Ē(hhh]ĒhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆXIn addition, the parameters of the test set, i.e. the omic dataset and the survival fileĒh]ĒhĆXIn addition, the parameters of the test set, i.e. the omic dataset and the survival fileĒÖĒĀĒ}Ē(hĆXIn addition, the parameters of the test set, i.e. the omic dataset and the survival fileĒhj,hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khj)hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khj&hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h>h*Khjhhh+h,ubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khjhhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆ[The parameters of the autoencoder (the default parameters works but it might be fine-tuned.Ēh]ĒhĆ[The parameters of the autoencoder (the default parameters works but it might be fine-tuned.ĒÖĒĀĒ}Ē(hĆ[The parameters of the autoencoder (the default parameters works but it might be fine-tuned.ĒhjPhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KhjMhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khjhhh+h,ubhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆHThe parameters of the classification procedures (default are still good)Ēh]ĒhĆHThe parameters of the classification procedures (default are still good)ĒÖĒĀĒ}Ē(hĆHThe parameters of the classification procedures (default are still good)Ēhjhhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khjehhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khjhhh+h,ubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h>h*KhhŘhhh+h,ubeh}Ē(h]ĒĆinput-parametersĒah!]Ēh#]ĒĆinput parametersĒah%]Ēh']Ēuh)h
h*Khhhhh+h,ubh)ĀĒ}Ē(hhh]Ē(h)ĀĒ}Ē(hĆInput matricesĒh]ĒhĆInput matricesĒÖĒĀĒ}Ē(hĆInput matricesĒhjéubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hh*Khjčh+h,ubh.)ĀĒ}Ē(hĆ&As examples, we included two datasets:Ēh]ĒhĆ&As examples, we included two datasets:ĒÖĒĀĒ}Ē(hĆ&As examples, we included two datasets:ĒhjĚhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Khjčhhh+h,ubh?)ĀĒ}Ē(hhh]ĒhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆ4A dummy example dataset in the example/data/ folder:Ēh]Ē(hĆA dummy example dataset in the ĒÖĒĀĒ}Ē(hĆA dummy example dataset in the Ēhj≤hhh+Nh*Nubhų)ĀĒ}Ē(hĆ
example/data/Ēh]ĒhĆ
example/data/ĒÖĒĀĒ}Ē(hhhjĽhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhj≤hhh+h,h*KubhĆ folder:ĒÖĒĀĒ}Ē(hĆ folder:Ēhj≤hhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KhjĮhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*Khj¨hhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h>h*Khjčhhh+h,ubh	Ć
literal_blockĒďĒ)ĀĒ}Ē(hĆžexamples
‚Ēú‚ĒÄ‚ĒÄ data
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ meth_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ mir_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ rna_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ rna_test_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ survival_dummy.tsv
‚Ē⬆¬† ‚ĒĒ‚ĒÄ‚ĒÄ survival_test_dummy.tsvĒh]ĒhĆžexamples
‚Ēú‚ĒÄ‚ĒÄ data
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ meth_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ mir_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ rna_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ rna_test_dummy.tsv
‚Ē⬆¬† ‚Ēú‚ĒÄ‚ĒÄ survival_dummy.tsv
‚Ē⬆¬† ‚ĒĒ‚ĒÄ‚ĒÄ survival_test_dummy.tsvĒÖĒĀĒ}Ē(hhhj‚ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒĆ	xml:spaceĒĆpreserveĒuh)jŗhjčhhh+h,h*Kubh?)ĀĒ}Ē(hhh]ĒhD)ĀĒ}Ē(hhh]Ēh.)ĀĒ}Ē(hĆĖAnd a real dataset in the data folder. This dataset derives from the TCGA HCC cancer dataset. This dataset needs to be decompressed before processing:Ēh]Ē(hĆAnd a real dataset in the ĒÖĒĀĒ}Ē(hĆAnd a real dataset in the Ēhjķhhh+Nh*Nubhų)ĀĒ}Ē(hĆdataĒh]ĒhĆdataĒÖĒĀĒ}Ē(hhhjhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhjķhhh+h,h*KubhĆx folder. This dataset derives from the TCGA HCC cancer dataset. This dataset needs to be decompressed before processing:ĒÖĒĀĒ}Ē(hĆx folder. This dataset derives from the TCGA HCC cancer dataset. This dataset needs to be decompressed before processing:Ēhjķhhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K#hjųhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hCh*K#hjŰhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h>h*K#hjčhhh+h,ubjŠ)ĀĒ}Ē(hĆ\data
‚Ēú‚ĒÄ‚ĒÄ meth.tsv.gz
‚Ēú‚ĒÄ‚ĒÄ mir.tsv.gz
‚Ēú‚ĒÄ‚ĒÄ rna.tsv.gz
‚ĒĒ‚ĒÄ‚ĒÄ survival.tsv
Ēh]ĒhĆ\data
‚Ēú‚ĒÄ‚ĒÄ meth.tsv.gz
‚Ēú‚ĒÄ‚ĒÄ mir.tsv.gz
‚Ēú‚ĒÄ‚ĒÄ rna.tsv.gz
‚ĒĒ‚ĒÄ‚ĒÄ survival.tsv
ĒÖĒĀĒ}Ē(hhhj(ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚjůuh)jŗhjčhhh+h,h*Kubh.)ĀĒ}Ē(hĆ/An input matrix file should follow this format:Ēh]ĒhĆ/An input matrix file should follow this format:ĒÖĒĀĒ}Ē(hĆ/An input matrix file should follow this format:Ēhj8hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K.hjčhhh+h,ubjŠ)ĀĒ}Ē(hX]head mir_dummy.tsv

Samples        dummy_mir_0     dummy_mir_1     dummy_mir_2     dummy_mir_3 ...
sample_test_0  0.469656032287  0.347987447237  0.706633335508  0.440068758445 ...
sample_test_1  0.0453108219657 0.0234642968791 0.593393816691  0.981872970341 ...
sample_test_2  0.908784043793  0.854397550009  0.575879144667  0.553333958713 ...
...
Ēh]ĒhX]head mir_dummy.tsv

Samples        dummy_mir_0     dummy_mir_1     dummy_mir_2     dummy_mir_3 ...
sample_test_0  0.469656032287  0.347987447237  0.706633335508  0.440068758445 ...
sample_test_1  0.0453108219657 0.0234642968791 0.593393816691  0.981872970341 ...
sample_test_2  0.908784043793  0.854397550009  0.575879144667  0.553333958713 ...
...
ĒÖĒĀĒ}Ē(hhhjGubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚjůuh)jŗhjčhhh+h,h*Kubh.)ĀĒ}Ē(hĆ[Also, if multiple matrices are used as input, they must keep the sample order. For example:Ēh]ĒhĆ[Also, if multiple matrices are used as input, they must keep the sample order. For example:ĒÖĒĀĒ}Ē(hĆ[Also, if multiple matrices are used as input, they must keep the sample order. For example:ĒhjWhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K;hjčhhh+h,ubjŠ)ĀĒ}Ē(hXShead rna_dummy.tsv

Samples        dummy_gene_0     dummy_gene_1     dummy_gene_2     dummy_gene_3 ...
sample_test_0  0.69656032287  0.47987447237  0.06633335508  0.40068758445 ...
sample_test_1  0.53108219657 0.234642968791 0.93393816691  0.81872970341 ...
sample_test_2  0.8784043793  0.54397550009  0.75879144667  0.53333958713 ...
...
Ēh]ĒhXShead rna_dummy.tsv

Samples        dummy_gene_0     dummy_gene_1     dummy_gene_2     dummy_gene_3 ...
sample_test_0  0.69656032287  0.47987447237  0.06633335508  0.40068758445 ...
sample_test_1  0.53108219657 0.234642968791 0.93393816691  0.81872970341 ...
sample_test_2  0.8784043793  0.54397550009  0.75879144667  0.53333958713 ...
...
ĒÖĒĀĒ}Ē(hhhjfubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚjůuh)jŗhjčhhh+h,h*Kubh.)ĀĒ}Ē(hĆnThe  arguments training_tsv and path_data from the extract_data module are used to defined the input matrices.Ēh]Ē(hĆThe  arguments ĒÖĒĀĒ}Ē(hĆThe  arguments Ēhjvhhh+Nh*Nubhų)ĀĒ}Ē(hĆtraining_tsvĒh]ĒhĆtraining_tsvĒÖĒĀĒ}Ē(hhhjhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhjvhhh+h,h*KubhĆ and ĒÖĒĀĒ}Ē(hĆ and Ēhjvhhh+Nh*Nubhų)ĀĒ}Ē(hĆ	path_dataĒh]ĒhĆ	path_dataĒÖĒĀĒ}Ē(hhhjíhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhjvhhh+h,h*KubhĆ
 from the ĒÖĒĀĒ}Ē(hĆ
 from the Ēhjvhhh+Nh*Nubhų)ĀĒ}Ē(hĆextract_dataĒh]ĒhĆextract_dataĒÖĒĀĒ}Ē(hhhj•hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhjvhhh+h,h*KubhĆ/ module are used to defined the input matrices.ĒÖĒĀĒ}Ē(hĆ/ module are used to defined the input matrices.Ēhjvhhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KHhjčhhh+h,ubjŠ)ĀĒ}Ē(hĆ»# The keys/values of this dict represent the name of the omic and the corresponding input matrix
training_tsv = {
    'GE': 'rna_dummy.tsv',
    'MIR': 'mir_dummy.tsv',
    'METH': 'meth_dummy.tsv',
}Ēh]ĒhĆ»# The keys/values of this dict represent the name of the omic and the corresponding input matrix
training_tsv = {
    'GE': 'rna_dummy.tsv',
    'MIR': 'mir_dummy.tsv',
    'METH': 'meth_dummy.tsv',
}ĒÖĒĀĒ}Ē(hhhjĺubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆpythonĒjÚjůuh)jŗhjčhhh+h,h*Kubh.)ĀĒ}Ē(hĆ&a survival file must have this format:Ēh]ĒhĆ&a survival file must have this format:ĒÖĒĀĒ}Ē(hĆ&a survival file must have this format:Ēhjőhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KShjčhhh+h,ubjŠ)ĀĒ}Ē(hĆĒhead survival_dummy.tsv

barcode        days recurrence
sample_test_0  134  1
sample_test_1  291  0
sample_test_2  125  1
sample_test_3  43   0
...
Ēh]ĒhĆĒhead survival_dummy.tsv

barcode        days recurrence
sample_test_0  134  1
sample_test_1  291  0
sample_test_2  125  1
sample_test_3  43   0
...
ĒÖĒĀĒ}Ē(hhhj›ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚjůuh)jŗhjčhhh+h,h*Kubh.)ĀĒ}Ē(hĆéIn addition, the fields corresponding to the patient IDs, the survival time, and the event should be defined using the survival_flag argument:Ēh]Ē(hĆwIn addition, the fields corresponding to the patient IDs, the survival time, and the event should be defined using the ĒÖĒĀĒ}Ē(hĆwIn addition, the fields corresponding to the patient IDs, the survival time, and the event should be defined using the ĒhjŪhhh+Nh*Nubhų)ĀĒ}Ē(hĆ
survival_flagĒh]ĒhĆ
survival_flagĒÖĒĀĒ}Ē(hhhjŲhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhjŪhhh+h,h*KubhĆ
 argument:ĒÖĒĀĒ}Ē(hĆ
 argument:ĒhjŪhhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Kahjčhhh+h,ubjŠ)ĀĒ}Ē(hĆÜ#Default value
survival_flag = {'patient_id': 'barcode',
                  'survival': 'days',
                 'event': 'recurrence'}Ēh]ĒhĆÜ#Default value
survival_flag = {'patient_id': 'barcode',
                  'survival': 'days',
                 'event': 'recurrence'}ĒÖĒĀĒ}Ē(hhhjubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆpythonĒjÚjůuh)jŗhjčhhh+h,h*Kubeh}Ē(h]ĒĆinput-matricesĒah!]Ēh#]ĒĆinput matricesĒah%]Ēh']Ēuh)h
h*Khhhhh+h,ubh)ĀĒ}Ē(hhh]Ē(h)ĀĒ}Ē(hĆCCreating a simple DeepProg model with one autoencoder for each omicĒh]ĒhĆCCreating a simple DeepProg model with one autoencoder for each omicĒÖĒĀĒ}Ē(hĆCCreating a simple DeepProg model with one autoencoder for each omicĒhj*ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hh*Kjhj'h+h,ubh.)ĀĒ}Ē(hĆŰFirst, we will build a model using the example dataset from ./examples/data/ (These example files are set as default in the config.py file). We will use them to show how to construct a single DeepProg model inferring a autoencoder for each omicĒh]Ē(hĆ<First, we will build a model using the example dataset from ĒÖĒĀĒ}Ē(hĆ<First, we will build a model using the example dataset from Ēhj9hhh+Nh*Nubhų)ĀĒ}Ē(hĆ./examples/data/Ēh]ĒhĆ./examples/data/ĒÖĒĀĒ}Ē(hhhjBhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)hŲhj9hhh+h,h*KubhĆ® (These example files are set as default in the config.py file). We will use them to show how to construct a single DeepProg model inferring a autoencoder for each omicĒÖĒĀĒ}Ē(hĆ® (These example files are set as default in the config.py file). We will use them to show how to construct a single DeepProg model inferring a autoencoder for each omicĒhj9hhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Klhj'hhh+h,ubjŠ)ĀĒ}Ē(hX"
# SimDeep class can be used to build one model with one autoencoder for each omic
from simdeep.simdeep_analysis import SimDeep
from simdeep.extract_data import LoadData

help(SimDeep) # to see all the functions
help(LoadData) # to see all the functions related to loading datasets

# Defining training datasets
from simdeep.config import TRAINING_TSV
from simdeep.config import SURVIVAL_TSV
# Location of the input matrices and survival file
from simdeep.config import PATH_DATA

dataset = LoadData(training_tsv=TRAINING_TSV,
        survival_tsv=SURVIVAL_TSV,
        path_data=PATH_DATA)

# Defining the result path in which will be created an output folder
PATH_RESULTS = "./TEST_DUMMY/"

# instantiate the model with the dummy example training dataset defined in the config file
simDeep = SimDeep(
        dataset=dataset,
        path_results=PATH_RESULTS,
        path_to_save_modelPATH_RESULTS, # This result path can be used to save the autoencoder
        )

simDeep.load_training_dataset() # load the training dataset
simDeep.fit() # fit the modelĒh]ĒhX"
# SimDeep class can be used to build one model with one autoencoder for each omic
from simdeep.simdeep_analysis import SimDeep
from simdeep.extract_data import LoadData

help(SimDeep) # to see all the functions
help(LoadData) # to see all the functions related to loading datasets

# Defining training datasets
from simdeep.config import TRAINING_TSV
from simdeep.config import SURVIVAL_TSV
# Location of the input matrices and survival file
from simdeep.config import PATH_DATA

dataset = LoadData(training_tsv=TRAINING_TSV,
        survival_tsv=SURVIVAL_TSV,
        path_data=PATH_DATA)

# Defining the result path in which will be created an output folder
PATH_RESULTS = "./TEST_DUMMY/"

# instantiate the model with the dummy example training dataset defined in the config file
simDeep = SimDeep(
        dataset=dataset,
        path_results=PATH_RESULTS,
        path_to_save_modelPATH_RESULTS, # This result path can be used to save the autoencoder
        )

simDeep.load_training_dataset() # load the training dataset
simDeep.fit() # fit the modelĒÖĒĀĒ}Ē(hhhj[ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆpythonĒjÚjůuh)jŗhj'hhh+h,h*Kubh.)ĀĒ}Ē(hĆ\At that point, the model is fitted and some output files are available in the output folder:Ēh]ĒhĆ\At that point, the model is fitted and some output files are available in the output folder:ĒÖĒĀĒ}Ē(hĆ\At that point, the model is fitted and some output files are available in the output folder:Ēhjkhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KŹhj'hhh+h,ubjŠ)ĀĒ}Ē(hĆyTEST_DUMMY
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_KM_plot_training_dataset.png
‚ĒĒ‚ĒÄ‚ĒÄ test_dummy_dataset_training_set_labels.tsvĒh]ĒhĆyTEST_DUMMY
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_KM_plot_training_dataset.png
‚ĒĒ‚ĒÄ‚ĒÄ test_dummy_dataset_training_set_labels.tsvĒÖĒĀĒ}Ē(hhhjzubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚjůuh)jŗhj'hhh+h,h*Kubh.)ĀĒ}Ē(hĆJThe tsv file contains the label and the label probability for each sample:Ēh]ĒhĆJThe tsv file contains the label and the label probability for each sample:ĒÖĒĀĒ}Ē(hĆJThe tsv file contains the label and the label probability for each sample:Ēhjähhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Kóhj'hhh+h,ubjŠ)ĀĒ}Ē(hĆŲsample_test_0   1       7.22678272919e-12
sample_test_1   1       4.48594196888e-09
sample_test_4   1       1.53363205571e-06
sample_test_5   1       6.72170409655e-08
sample_test_6   0       0.9996581662
sample_test_7   1       3.38139255666e-08Ēh]ĒhĆŲsample_test_0   1       7.22678272919e-12
sample_test_1   1       4.48594196888e-09
sample_test_4   1       1.53363205571e-06
sample_test_5   1       6.72170409655e-08
sample_test_6   0       0.9996581662
sample_test_7   1       3.38139255666e-08ĒÖĒĀĒ}Ē(hhhjôubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚjůuh)jŗhj'hhh+h,h*Kubh.)ĀĒ}Ē(hĆ;And we also have the visualisation of a Kaplan-Meier Curve:Ēh]ĒhĆ;And we also have the visualisation of a Kaplan-Meier Curve:ĒÖĒĀĒ}Ē(hĆ;And we also have the visualisation of a Kaplan-Meier Curve:Ēhj©hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KĘhj'hhh+h,ubh.)ĀĒ}Ē(hĆKM plotĒh]Ēh	ĆimageĒďĒ)ĀĒ}Ē(hjļh]ĒhhÖĒĀĒ}Ē(hhhjĺhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆuriĒĆ5./img/test_dummy_dataset_KM_plot_training_dataset.pngĒĆaltĒjļĆ
candidatesĒ}ĒĆ*Ējňsuh)jľhjłhhh+h,h*Kubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K§hj'hhh+h,ubh.)ĀĒ}Ē(hX$Now we are ready to use a test dataset and to infer the class label for the test samples.
The test dataset do not need to have the same input omic matrices than the training dataset and not even the sample features for a given omic. However, it needs to have at least some features in common.Ēh]Ē(hĆYNow we are ready to use a test dataset and to infer the class label for the test samples.ĒÖĒĀĒ}Ē(hĆYNow we are ready to use a test dataset and to infer the class label for the test samples.Ēhj÷hhh+Nh*NubhĆ
ĒÖĒĀĒ}Ē(hhhj÷hhh+Nh*NubhĆ The test dataset do not need to have the same input omic matrices than the training dataset and not even the sample features for a given omic. However, it needs to have at least some features in common.ĒÖĒĀĒ}Ē(hĆ The test dataset do not need to have the same input omic matrices than the training dataset and not even the sample features for a given omic. However, it needs to have at least some features in common.Ēhj÷hhh+Nh*Nubeh}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K¶hj'hhh+h,ubjŠ)ĀĒ}Ē(hXŃ# Defining test datasets
from simdeep.config import TEST_TSV
from simdeep.config import SURVIVAL_TSV_TEST

simDeep.load_new_test_dataset(
    TEST_TSV,
    fname_key='dummy'
    SURVIVAL_TSV_TEST, # [OPTIONAL] test survival file useful to compute accuracy of test dataset

    )

# The test set is a dummy rna expression (generated randomly)
print(simDeep.dataset.test_tsv) # Defined in the config file
# The data type of the test set is also defined to match an existing type
print(simDeep.dataset.data_type) # Defined in the config file
simDeep.predict_labels_on_test_dataset() # Perform the classification analysis and label the set dataset

print(simDeep.test_labels)
print(simDeep.test_labels_proba)
Ēh]ĒhXŃ# Defining test datasets
from simdeep.config import TEST_TSV
from simdeep.config import SURVIVAL_TSV_TEST

simDeep.load_new_test_dataset(
    TEST_TSV,
    fname_key='dummy'
    SURVIVAL_TSV_TEST, # [OPTIONAL] test survival file useful to compute accuracy of test dataset

    )

# The test set is a dummy rna expression (generated randomly)
print(simDeep.dataset.test_tsv) # Defined in the config file
# The data type of the test set is also defined to match an existing type
print(simDeep.dataset.data_type) # Defined in the config file
simDeep.predict_labels_on_test_dataset() # Perform the classification analysis and label the set dataset

print(simDeep.test_labels)
print(simDeep.test_labels_proba)
ĒÖĒĀĒ}Ē(hhhjÓubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆpythonĒjÚjůuh)jŗhj'hhh+h,h*Kubh.)ĀĒ}Ē(hĆgThe assigned class and class probabilities for the test samples are now available in the output folder:Ēh]ĒhĆgThe assigned class and class probabilities for the test samples are now available in the output folder:ĒÖĒĀĒ}Ē(hĆgThe assigned class and class probabilities for the test samples are now available in the output folder:ĒhjĢhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*Kņhj'hhh+h,ubjŠ)ĀĒ}Ē(hXTEST_DUMMY
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_dummy_KM_plot_test.png
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_dummy_test_labels.tsv
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_KM_plot_training_dataset.png
‚ĒĒ‚ĒÄ‚ĒÄ test_dummy_dataset_training_set_labels.tsv

head test_dummy_dataset_training_set_labels.tsv


Ēh]ĒhXTEST_DUMMY
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_dummy_KM_plot_test.png
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_dummy_test_labels.tsv
‚Ēú‚ĒÄ‚ĒÄ test_dummy_dataset_KM_plot_training_dataset.png
‚ĒĒ‚ĒÄ‚ĒÄ test_dummy_dataset_training_set_labels.tsv

head test_dummy_dataset_training_set_labels.tsv


ĒÖĒĀĒ}Ē(hhhj
ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆbashĒjÚjůuh)jŗhj'hhh+h,h*Kubh.)ĀĒ}Ē(hĆ7And a KM plot is also constructed using the test labelsĒh]ĒhĆ7And a KM plot is also constructed using the test labelsĒÖĒĀĒ}Ē(hĆ7And a KM plot is also constructed using the test labelsĒhjhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*KŌhj'hhh+h,ubh.)ĀĒ}Ē(hĆKM plot testĒh]ĒjĹ)ĀĒ}Ē(hj.h]ĒhhÖĒĀĒ}Ē(hhhj0hhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēj Ć/./img/test_dummy_dataset_dummy_KM_plot_test.pngĒjŐj.jÕ}ĒjŌj<suh)jľhj,hhh+h,h*Kubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K—hj'hhh+h,ubh.)ĀĒ}Ē(hĆ0Finally, it is possible to save the keras model:Ēh]ĒhĆ0Finally, it is possible to save the keras model:ĒÖĒĀĒ}Ē(hĆ0Finally, it is possible to save the keras model:ĒhjDhhh+Nh*Nubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']Ēuh)h-h*K”hj'hhh+h,ubjŠ)ĀĒ}Ē(hĆ)simDeep.save_encoders('dummy_encoder.h5')Ēh]ĒhĆ)simDeep.save_encoders('dummy_encoder.h5')ĒÖĒĀĒ}Ē(hhhjSubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆlanguageĒĆpythonĒjÚjůuh)jŗhj'hhh+h,h*Kubeh}Ē(h]ĒĆCcreating-a-simple-deepprog-model-with-one-autoencoder-for-each-omicĒah!]Ēh#]ĒĆCcreating a simple deepprog model with one autoencoder for each omicĒah%]Ēh']Ēuh)h
h*Kjhhhhh+h,ubeh}Ē(h]ĒĆtutorial-simple-deepprog-modelĒah!]Ēh#]ĒĆtutorial: simple deepprog modelĒah%]Ēh']Ēuh)h
h*Khhhhh+h,ubah}Ē(h]Ēh!]Ēh#]Ēh%]Ēh']ĒĆsourceĒh,uh)hĆcurrent_sourceĒNĆcurrent_lineĒNĆsettingsĒĆdocutils.frontendĒĆValuesĒďĒ)ĀĒ}Ē(hNĆ	generatorĒNĆ	datestampĒNĆsource_linkĒNĆ
source_urlĒNĆ
toc_backlinksĒĆentryĒĆfootnote_backlinksĒKĆ
sectnum_xformĒKĆstrip_commentsĒNĆstrip_elements_with_classesĒNĆ
strip_classesĒNĆreport_levelĒKĆ
halt_levelĒKĆexit_status_levelĒKĆdebugĒNĆwarning_streamĒNĆ	tracebackĒąĆinput_encodingĒĆ	utf-8-sigĒĆinput_encoding_error_handlerĒĆstrictĒĆoutput_encodingĒĆutf-8ĒĆoutput_encoding_error_handlerĒjĖĆerror_encodingĒĆutf-8ĒĆerror_encoding_error_handlerĒĆbackslashreplaceĒĆ
language_codeĒĆenĒĆrecord_dependenciesĒNĆconfigĒNĆ	id_prefixĒhĆauto_id_prefixĒĆidĒĆ
dump_settingsĒNĆdump_internalsĒNĆdump_transformsĒNĆdump_pseudo_xmlĒNĆexpose_internalsĒNĆstrict_visitorĒNĆ_disable_configĒNĆ_sourceĒh,Ć_destinationĒNĆ
_config_filesĒ]ĒĆpep_referencesĒNĆpep_base_urlĒĆ https://www.python.org/dev/peps/ĒĆpep_file_url_templateĒĆpep-%04dĒĆrfc_referencesĒNĆrfc_base_urlĒĆhttps://tools.ietf.org/html/ĒĆ	tab_widthĒKĆtrim_footnote_reference_spaceĒČĆfile_insertion_enabledĒąĆraw_enabledĒKĆsyntax_highlightĒĆlongĒĆsmart_quotesĒąĆsmartquotes_localesĒ]ĒĆcharacter_level_inline_markupĒČĆdoctitle_xformĒČĆ
docinfo_xformĒKĆsectsubtitle_xformĒČĆembed_stylesheetĒČĆcloak_email_addressesĒąĆenvĒNubĆreporterĒNĆindirect_targetsĒ]ĒĆsubstitution_defsĒ}ĒĆsubstitution_namesĒ}ĒĆrefnamesĒ}ĒĆrefidsĒ}ĒĆnameidsĒ}Ē(jpjmjąjÖj$j!jhjeuĆ	nametypesĒ}Ē(jpNjąNj$NjhNuh}Ē(jmhjÖhŘj!jčjej'uĆ
footnote_refsĒ}ĒĆ
citation_refsĒ}ĒĆ
autofootnotesĒ]ĒĆautofootnote_refsĒ]ĒĆsymbol_footnotesĒ]ĒĆsymbol_footnote_refsĒ]ĒĆ	footnotesĒ]ĒĆ	citationsĒ]ĒĆautofootnote_startĒKĆsymbol_footnote_startĒKĆ
id_counterĒĆcollectionsĒĆCounterĒďĒ}ĒÖĒRĒĆparse_messagesĒ]ĒĆtransform_messagesĒ]ĒĆtransformerĒNĆ
decorationĒNhhub.