[214c6e]: / 01_jund_prediction / dna_only.py

Download this file

75 lines (56 with data), 2.3 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from itertools import product
import argparse
import os
import numpy as np
import matplotlib
import pandas as pd
from model_utils import objective
matplotlib.use('Agg')
if "CUDA_VISIBLE_DEVICES" not in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
np.random.seed(1234)
# Fetch parser arguments
PARSER = argparse.ArgumentParser(description='DNA model.')
PARSER.add_argument('-inputpath', dest='inpath',
default='../data', help='Location of input files')
PARSER.add_argument('-path', dest='path',
default='../jund_results',
help="Output directory for the examples.")
args = PARSER.parse_args()
os.environ['JANGGU_OUTPUT'] = args.path
inpath = args.inpath
# load the dataset
# first do an exhaustive grid search
print("#" * 20)
print("Test effect of scanning single or both strands and higher-order motifs")
results = {'auprc_val':[], 'auprc_test':[], 'dropout':[], 'order':[], 'strand':[], 'modelname':[]}
run = 5
for order, sdrop, rep in product([3, 2, 1], [0.0, 0.2], [1, 2, 3, 4, 5]):
shared_space = {'type': 'dna_only',
'name': 'dna_o{}_d{}_run_{}_{}'.format(order, sdrop, run, rep),
'binsize': 200,
'epochs': 30,
'seq_dropout': sdrop,
'dnaflank': 150,
'dnaseflank': 0,
'order': order,
'stranded': 'double',
'nmotifs1': 10,
'motiflen': 11,
'pool1': 30,
'stride': 1,
'shift_range': 0,
'nmotifs2': 8,
'hypermotiflen': 3,
'opt': 'amsgrad'}
print(shared_space['name'])
res = objective(shared_space)
results['auprc_val'].append(res['auprc_val'])
results['auprc_test'].append(res['auprc_test'])
results['dropout'].append(sdrop)
results['order'].append(order)
results['strand'].append(shared_space['stranded'])
results['modelname'].append(res['modelname'])
df = pd.DataFrame(results)
df.to_csv(os.path.join(os.environ['JANGGU_OUTPUT'],
"dna_gridsearch_{}.tsv".format(run)), sep='\t')