Switch to unified view

a b/Tuning_hyperparameters/DNNGP_OPN.py
1
# DNNGP3 tuning hyperparameters script
2
import os
3
import re
4
import json
5
import subprocess
6
import numpy as np
7
import nevergrad as ng
8
import tensorflow as tf
9
# The script needs to set parameters in three places, one is the #10 directory location, the second is the #21 hyperparameter search space, and the third is the #49 DNNGP native command.
10
# Set priorities in descending order, except for the directory, the default parameters are sufficient for most requests.
11
# Define directories and file paths
12
output_dir = r'..\Output_files'
13
pkl_file = r"..\Input_files\wheat599_pc95.pkl"
14
budget = 200  # Optimize the number of script iterations
15
alpha = 0.7  # Adjust this weight to balance the importance of mean and variance in the optimization process
16
beta = 0.1  # This parameter is adjusted to control the nonlinear effect of the deviation in the optimization process
17
cvs = 10 # K-fold cross-validation
18
19
pkl_dir = os.path.dirname(pkl_file)
20
# Obtain all tsv files in the directory where the pkl file resides
21
tsv_files = [f for f in os.listdir(pkl_dir) if f.endswith('.tsv')]
22
23
def check_gpu_available():
24
    """Check and display GPU availability information"""
25
    gpus = tf.config.list_physical_devices('GPU')
26
    if gpus:
27
        print("🎉 GPU is available!")
28
        for idx, gpu in enumerate(gpus):
29
            print(f"[Device {idx}]")
30
            print(f"  Name: {gpu.name}")
31
            try:
32
                details = tf.config.experimental.get_device_details(gpu)
33
                print(f"  Compute Capability: {details.get('compute_capability')}")
34
                print(f"  Device Type: {details.get('device_type', 'N/A')}")
35
            except AttributeError:
36
                print("  Unable to retrieve detailed device information (may require TensorFlow version upgrade)")
37
        return True
38
    else:
39
        print("⚠️ No GPU detected, will use CPU")
40
        return False
41
check_gpu_available()
42
# Define hyperparameters search space (see https://github.com/facebookresearch/nevergrad)
43
instr = ng.p.Instrumentation(
44
    batch_size=ng.p.Scalar(lower=32, upper=1024).set_integer_casting(),
45
    lr=ng.p.Log(lower=1e-4, upper=1),
46
    patience=ng.p.Scalar(lower=10, upper=50).set_integer_casting(),
47
    dropout1=ng.p.Log(lower=0.01, upper=0.9),
48
    dropout2=ng.p.Log(lower=0.01, upper=0.9),
49
    earlystopping=ng.p.Scalar(lower=50, upper=100).set_integer_casting()
50
)
51
52
# Define a function to extract statistic values
53
54
def extract_statistics(output):
55
    statistics = re.findall(r'statistic=([-+]?[0-9]*\.?[0-9]+)', output)
56
    if not statistics:
57
        return 0.0
58
    statistic_values = float(statistics[0])
59
    return statistic_values
60
61
# Define the objective function
62
63
64
def objective(batch_size: int, lr: float, patience: int, dropout1: float, dropout2: float, earlystopping: int, tsv_file: str):
65
    accuracies = []
66
    print('batch:',batch_size, 'lr:', lr, 'patience:', patience, 'dropout1:', dropout1, 'dropout2:', dropout2, 'earlystopping:', earlystopping, 'tsv_file:', tsv_file)
67
68
    for part in range(1, cvs + 1):
69
        command = f"python ../Scripts/dnngp_runner.py --batch_size {batch_size} --epoch 10000 --lr {lr} --patience {patience} --dropout1 {dropout1} --dropout2 {dropout2} --earlystopping {earlystopping} --cv {cvs} --part {part} --snp {pkl_file} --pheno {os.path.join(pkl_dir, tsv_file)} --output {output_dir}"
70
        print(command)
71
        p = subprocess.Popen(command, shell=True,
72
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
73
        output, error = p.communicate()
74
75
        # Decode output
76
        output_str = output.decode(errors='ignore')
77
        error_str = error.decode(errors='ignore')
78
79
        if error_str:
80
            print("Error Output:", error_str)
81
82
        accuracy = extract_statistics(output_str)
83
        accuracies.append(accuracy)
84
    print("Statistic values for all folds", accuracies)
85
86
    mean_accuracy = np.mean(accuracies) if accuracies else 0.0
87
    var_accuracy = np.var(accuracies) if accuracies else 0.0
88
89
# Use a weighted combination to balance the mean and variance
90
# Use a nonlinear transform to adjust for the effect of variance
91
92
    combined_metric = alpha * mean_accuracy - \
93
        (1 - alpha) * np.exp(beta * var_accuracy)
94
    return -combined_metric
95
96
97
# Record the best parameters and results for each tsv file
98
best_params_per_tsv = {}
99
100
for tsv_file in tsv_files:
101
    print(f"Optimizing for TSV file: {tsv_file}")
102
    # Use Nevergrad's optimizer
103
    optimizer = ng.optimizers.NGOpt(parametrization=instr, budget=budget)
104
    # Execution optimization procedure
105
    recommendation = optimizer.minimize(
106
        lambda *args, **kwargs: objective(*args, **kwargs, tsv_file=tsv_file)
107
    )
108
    # Output optimum parameter
109
    print(f"Best parameters for {tsv_file}:", recommendation.value)
110
    best_params_per_tsv[tsv_file] = recommendation.value
111
112
# Output best_params_per_tsv to a JSON file
113
output_json_file = os.path.join(pkl_dir, 'best_params_per_tsv.json')
114
with open(output_json_file, 'w') as file:
115
    json.dump(best_params_per_tsv, file, indent=4)
116
print(f"Best parameters saved to {output_json_file}")