DeepProg / Git / [53737a] /examples/example_hyperparameters_tuning_with_test

Models:
AlyssaS/
DeepProg
Downloads: 1
[53737a]: / examples / example_hyperparameters_tuning_with_test_dataset.py
History
Download this file
88 lines (67 with data), 2.3 kB

"""
This example details how to optimize the choice of the hyperparameters to cluster
a multi-omic dataset using a reference test dataset as objective.

Multiple objective criteria can be used, such as test model final cox-PH pvalue,
, cluster consistency,
 c-index for out-of-bags samples or for the full labels, mix score, or sum of the pvalues

"""

from os.path import abspath
from os.path import split

from simdeep.simdeep_tuning import SimDeepTuning

import ray


def test_instance():
    """
    example of SimDeepBoosting
    """
    PATH_DATA = '{0}/../examples/data/'.format(split(abspath(__file__))[0])

    # Input file. We will only cluster on the RNA features, see below
    TRAINING_TSV = {'RNA': 'rna_dummy.tsv', 'METH': 'meth_dummy.tsv'}
    SURVIVAL_TSV = 'survival_dummy.tsv'

    PROJECT_NAME = 'TestProjectTuning'

    # We will use the methylation value as test dataset
    test_datasets = {
        'testdataset1': ({'METH': 'meth_dummy.tsv'}, 'survival_dummy.tsv')
    }

    # AgglomerativeClustering is an external class that can be used as
    # a clustering algorithm since it has a fit_predict method
    from sklearn.cluster import AgglomerativeClustering

    args_to_optimize = {
        'seed': [
            100, 200, 300, 400,
        ],
        'nb_clusters': [2, 5],
        'cluster_method': [
            'mixture',
            AgglomerativeClustering
        ],
    }

    tuning = SimDeepTuning(
        args_to_optimize=args_to_optimize,
        test_datasets=test_datasets,
        survival_tsv=SURVIVAL_TSV,
        training_tsv=TRAINING_TSV,
        path_data=PATH_DATA,
        project_name=PROJECT_NAME,
        path_results=PATH_DATA,
        clustering_omics=['RNA'], # Only cluster on RNA
    )

    ray.init()

    # Possible metrics for test set: {
    #         "log_test_pval",
    #         "test_cindex",
    #         "test_consisentcy",
    #         "sum_log_pval",
    #     }

    tuning.fit(
        metric='log_test_pval',
        num_samples=10,
        distribute_deepprog=True,
        max_concurrent=2,
        # iterations is usefull to take into account the DL parameter fitting variations
        iterations=1,
    )

    table = tuning.get_results_table()
    tuning.save_results_table()

    ray.shutdown()


if __name__ == '__main__':
    test_instance()