|
a |
|
b/deepdta-toy/arguments.py |
|
|
1 |
import argparse |
|
|
2 |
import os |
|
|
3 |
|
|
|
4 |
|
|
|
5 |
|
|
|
6 |
def argparser(): |
|
|
7 |
parser = argparse.ArgumentParser() |
|
|
8 |
# for model |
|
|
9 |
parser.add_argument( |
|
|
10 |
'--seq_window_lengths', |
|
|
11 |
type=int, |
|
|
12 |
nargs='+', |
|
|
13 |
help='Space seperated list of motif filter lengths. (ex, --window_lengths 4 8 12)' |
|
|
14 |
) |
|
|
15 |
parser.add_argument( |
|
|
16 |
'--smi_window_lengths', |
|
|
17 |
type=int, |
|
|
18 |
nargs='+', |
|
|
19 |
help='Space seperated list of motif filter lengths. (ex, --window_lengths 4 8 12)' |
|
|
20 |
) |
|
|
21 |
parser.add_argument( |
|
|
22 |
'--num_windows', |
|
|
23 |
type=int, |
|
|
24 |
nargs='+', |
|
|
25 |
help='Space seperated list of the number of motif filters corresponding to length list. (ex, --num_windows 100 200 100)' |
|
|
26 |
) |
|
|
27 |
parser.add_argument( |
|
|
28 |
'--num_hidden', |
|
|
29 |
type=int, |
|
|
30 |
default=0, |
|
|
31 |
help='Number of neurons in hidden layer.' |
|
|
32 |
) |
|
|
33 |
parser.add_argument( |
|
|
34 |
'--num_classes', |
|
|
35 |
type=int, |
|
|
36 |
default=0, |
|
|
37 |
help='Number of classes (families).' |
|
|
38 |
) |
|
|
39 |
parser.add_argument( |
|
|
40 |
'--max_seq_len', |
|
|
41 |
type=int, |
|
|
42 |
default=0, |
|
|
43 |
help='Length of input sequences.' |
|
|
44 |
) |
|
|
45 |
parser.add_argument( |
|
|
46 |
'--max_smi_len', |
|
|
47 |
type=int, |
|
|
48 |
default=0, |
|
|
49 |
help='Length of input sequences.' |
|
|
50 |
) |
|
|
51 |
# for learning |
|
|
52 |
parser.add_argument( |
|
|
53 |
'--learning_rate', |
|
|
54 |
type=float, |
|
|
55 |
default=0.001, |
|
|
56 |
help='Initial learning rate.' |
|
|
57 |
) |
|
|
58 |
parser.add_argument( |
|
|
59 |
'--num_epoch', |
|
|
60 |
type=int, |
|
|
61 |
default=100, |
|
|
62 |
help='Number of epochs to train.' |
|
|
63 |
) |
|
|
64 |
parser.add_argument( |
|
|
65 |
'--batch_size', |
|
|
66 |
type=int, |
|
|
67 |
default=256, |
|
|
68 |
help='Batch size. Must divide evenly into the dataset sizes.' |
|
|
69 |
) |
|
|
70 |
parser.add_argument( |
|
|
71 |
'--train_path', |
|
|
72 |
type=str, |
|
|
73 |
default='/data/DTC/', |
|
|
74 |
help='Directory for input data.' |
|
|
75 |
) |
|
|
76 |
parser.add_argument( |
|
|
77 |
'--test_path', |
|
|
78 |
type=str, |
|
|
79 |
default='', |
|
|
80 |
help='Directory for input data.' |
|
|
81 |
) |
|
|
82 |
parser.add_argument( |
|
|
83 |
'--problem_type', |
|
|
84 |
type=int, |
|
|
85 |
default=1, |
|
|
86 |
help='Type of the prediction problem (1-4)' |
|
|
87 |
) |
|
|
88 |
parser.add_argument( |
|
|
89 |
'--isLog', |
|
|
90 |
type=int, |
|
|
91 |
default=0, |
|
|
92 |
help='Convert the values to log10^9' |
|
|
93 |
) |
|
|
94 |
parser.add_argument( |
|
|
95 |
'--binary_th', |
|
|
96 |
type=float, |
|
|
97 |
default=0.0, |
|
|
98 |
help='Threshold to split data into binary classes' |
|
|
99 |
) |
|
|
100 |
|
|
|
101 |
parser.add_argument( |
|
|
102 |
'--checkpoint_path', |
|
|
103 |
type=str, |
|
|
104 |
default='', |
|
|
105 |
help='Path to write checkpoint file.' |
|
|
106 |
) |
|
|
107 |
parser.add_argument( |
|
|
108 |
'--log_dir', |
|
|
109 |
type=str, |
|
|
110 |
default='/tmp', |
|
|
111 |
help='Directory for log data.' |
|
|
112 |
) |
|
|
113 |
|
|
|
114 |
|
|
|
115 |
|
|
|
116 |
FLAGS, unparsed = parser.parse_known_args() |
|
|
117 |
|
|
|
118 |
# check validity |
|
|
119 |
#assert( len(FLAGS.window_lengths) == len(FLAGS.num_windows) ) |
|
|
120 |
|
|
|
121 |
return FLAGS |
|
|
122 |
|
|
|
123 |
|
|
|
124 |
|
|
|
125 |
|
|
|
126 |
def logging(msg, FLAGS): |
|
|
127 |
fpath = os.path.join( FLAGS.log_dir, "log.txt" ) |
|
|
128 |
with open( fpath, "a" ) as fw: |
|
|
129 |
fw.write("%s\n" % msg) |
|
|
130 |
#print(msg) |