|
a |
|
b/saliency_map.lua |
|
|
1 |
require 'torch' |
|
|
2 |
require 'nn' |
|
|
3 |
require 'optim' |
|
|
4 |
require 'model' |
|
|
5 |
include('util/auRoc.lua') |
|
|
6 |
require 'lfs' |
|
|
7 |
|
|
|
8 |
local cmd = torch.CmdLine() |
|
|
9 |
|
|
|
10 |
|
|
|
11 |
-- GPU |
|
|
12 |
cmd:option('-gpu', 1) -- set to 0 if no GPU |
|
|
13 |
|
|
|
14 |
-- Dataset options |
|
|
15 |
cmd:option('-data_root', 'data') -- data root directory |
|
|
16 |
cmd:option('-dataset', 'deepbind') -- dataset |
|
|
17 |
cmd:option('-seq_length', 101) --length of DNA sequences |
|
|
18 |
cmd:option('-TF', 'ATF1_K562_ATF1_-06-325-_Harvard') -- change for different TF |
|
|
19 |
cmd:option('-alphabet', 'ACGT') |
|
|
20 |
cmd:option('-size', 0) -- how much of each dataset to load. 0 = full |
|
|
21 |
cmd:option('-batch_size', 1) |
|
|
22 |
cmd:option('-class_labels','1,0') --specify positive label first |
|
|
23 |
|
|
|
24 |
|
|
|
25 |
local opt = cmd:parse(arg) |
|
|
26 |
|
|
|
27 |
opt.class_labels_table = opt.class_labels:split(',') |
|
|
28 |
opt.num_classes = #opt.class_labels_table |
|
|
29 |
opt.alphabet_size = #opt.alphabet |
|
|
30 |
|
|
|
31 |
local data_dir = opt.data_root..'/'..opt.dataset..'/' |
|
|
32 |
|
|
|
33 |
|
|
|
34 |
-- Set up GPU stuff |
|
|
35 |
local dtype = 'torch.FloatTensor' |
|
|
36 |
if opt.gpu > 0 then |
|
|
37 |
collectgarbage() |
|
|
38 |
require 'cutorch' |
|
|
39 |
require 'cunn' |
|
|
40 |
cutorch.setDevice(opt.gpu ) |
|
|
41 |
dtype = 'torch.CudaTensor' |
|
|
42 |
print(string.format('Running with CUDA on GPU %d', opt.gpu)) |
|
|
43 |
else |
|
|
44 |
print 'Running in CPU mode' |
|
|
45 |
end |
|
|
46 |
|
|
|
47 |
|
|
|
48 |
|
|
|
49 |
local data_dir = opt.data_root..'/'..opt.dataset..'/' |
|
|
50 |
|
|
|
51 |
opt.TF = TF or opt.TF |
|
|
52 |
opt.data_dir = data_dir..opt.TF |
|
|
53 |
|
|
|
54 |
|
|
|
55 |
-- specify directories |
|
|
56 |
model_root = 'models' |
|
|
57 |
data_root = 'data/deepbind/' |
|
|
58 |
viz_dir = 'visualization_results/' |
|
|
59 |
|
|
|
60 |
|
|
|
61 |
-- ****************************************************************** -- |
|
|
62 |
-- ****************** CHANGE THESE FIELDS *************************** -- |
|
|
63 |
TFs = {'ATF1_K562_ATF1_-06-325-_Harvard'} |
|
|
64 |
cnn_model_name = 'model=CNN,cnn_size=128,cnn_filters=9-5-3,dropout=0.5,learning_rate=0.01,batch_size=256' |
|
|
65 |
rnn_model_name = 'model=RNN,rnn_size=32,rnn_layers=1,dropout=0.5,learning_rate=0.01,batch_size=256' |
|
|
66 |
cnnrnn_model_name = 'model=CNN-RNN,cnn_size=128,cnn_filter=9,rnn_size=32,rnn_layers=1,dropout=0.5,learning_rate=0.01,batch_size=256' |
|
|
67 |
|
|
|
68 |
model_names = {rnn_model_name,cnn_model_name,cnnrnn_model_name} |
|
|
69 |
|
|
|
70 |
-- which sequences in the test set to show saliency map for |
|
|
71 |
start_seq = 1 |
|
|
72 |
end_seq = start_seq + 0 |
|
|
73 |
-- ****************************************************************** -- |
|
|
74 |
-- ****************************************************************** -- |
|
|
75 |
|
|
|
76 |
|
|
|
77 |
alphabet = opt.alphabet |
|
|
78 |
rev_dictionary = {} |
|
|
79 |
dictionary = {} |
|
|
80 |
for i = 1,#alphabet do |
|
|
81 |
rev_dictionary[i] = alphabet:sub(i,i) |
|
|
82 |
dictionary[alphabet:sub(i,i)] = i |
|
|
83 |
end |
|
|
84 |
|
|
|
85 |
OneHot = OneHot(#alphabet):type(dtype) |
|
|
86 |
crit = nn.ClassNLLCriterion():type(dtype) |
|
|
87 |
|
|
|
88 |
|
|
|
89 |
for _,TF in pairs(TFs) do |
|
|
90 |
print(TF) |
|
|
91 |
save_path = viz_dir..TF..'/' |
|
|
92 |
os.execute('mkdir '..save_path..' > /dev/null 2>&1') |
|
|
93 |
-- os.execute('rm '..save_path..'/*.csv > /dev/null 2>&1') |
|
|
94 |
-- os.execute('rm '..save_path..'*.png > /dev/null 2>&1') |
|
|
95 |
require('data') |
|
|
96 |
data = {} |
|
|
97 |
test_seqs = createDatasetOneHot("test", opt) |
|
|
98 |
|
|
|
99 |
-- Load Models |
|
|
100 |
models = {} |
|
|
101 |
for _,model_name in pairs(model_names) do |
|
|
102 |
load_path = model_root..'/'..model_name..'/'..TF..'/' |
|
|
103 |
print(load_path) |
|
|
104 |
model = torch.load(load_path..'best_model.t7') |
|
|
105 |
model:evaluate() |
|
|
106 |
model.model:type(dtype) |
|
|
107 |
|
|
|
108 |
models[model_name] = model |
|
|
109 |
end |
|
|
110 |
|
|
|
111 |
|
|
|
112 |
for t = start_seq,end_seq do |
|
|
113 |
print('test sequence number '..t) |
|
|
114 |
x = test_seqs.inputs[t]:type(dtype) |
|
|
115 |
X = OneHot:forward(x) |
|
|
116 |
y = test_seqs.labels[t]:type(dtype) |
|
|
117 |
|
|
|
118 |
--####################### CREATE SEQ LOGO ###############################-- |
|
|
119 |
s2l_filename = save_path..'sequence_'..tostring(t)..'.txt' |
|
|
120 |
f = io.open(s2l_filename, 'w') |
|
|
121 |
print(s2l_filename) |
|
|
122 |
f:write('PO ') |
|
|
123 |
alphabet:gsub(".",function(c) f:write(tostring(c)..' ') end) |
|
|
124 |
f:write('\n') |
|
|
125 |
for i=1,X[1]:size(1) do |
|
|
126 |
f:write(tostring(i)..' ') |
|
|
127 |
for j=1,X[1]:size(2) do |
|
|
128 |
f:write(tostring(X[1][i][j])..' ') |
|
|
129 |
end |
|
|
130 |
f:write('\n') |
|
|
131 |
end |
|
|
132 |
f:close() |
|
|
133 |
cmd = "weblogo -D transfac -F png -o "..save_path.."sequence_"..t..".png --errorbars NO --show-xaxis NO --show-yaxis NO -A dna --composition none -n 101 --color '#00CC00' 'A' 'A' --color '#0000CC' 'C' 'C' --color '#FFB300' 'G' 'G' --color '#CC0000' 'T' 'T' < "..s2l_filename |
|
|
134 |
os.execute(cmd) |
|
|
135 |
|
|
|
136 |
|
|
|
137 |
for model_name, model in pairs(models) do |
|
|
138 |
print(model_name) |
|
|
139 |
out_file = io.open(save_path..model_name..'_saliency'..t..'.csv', 'w') |
|
|
140 |
|
|
|
141 |
model:resetStates() |
|
|
142 |
model:zeroGradParameters() |
|
|
143 |
|
|
|
144 |
------------------------SALIENCY------------------------ |
|
|
145 |
model.model:remove(1) |
|
|
146 |
output = model:forward(X) |
|
|
147 |
loss = crit:forward(output, y) |
|
|
148 |
df_do = crit:backward(output, y) |
|
|
149 |
inputGrads = model:backward(X, df_do) |
|
|
150 |
inputGrads = torch.abs(inputGrads) |
|
|
151 |
inputGrads = torch.cmul(inputGrads,X) |
|
|
152 |
-- inputGrads = inputGrads:max(2) |
|
|
153 |
inputGrads = inputGrads:view(opt.seq_length,4) |
|
|
154 |
score = output[1]:exp()[1] |
|
|
155 |
|
|
|
156 |
print(model_name..': '..tostring(score)) |
|
|
157 |
|
|
|
158 |
-- write to output file |
|
|
159 |
for i = 1,inputGrads:size(1) do |
|
|
160 |
out_file:write(rev_dictionary[x[1][i]]..',') |
|
|
161 |
for j = 1,inputGrads:size(2) do |
|
|
162 |
out_file:write(inputGrads[i][j]..',') |
|
|
163 |
end |
|
|
164 |
out_file:write('\n') |
|
|
165 |
end |
|
|
166 |
|
|
|
167 |
out_file:write('\n') |
|
|
168 |
out_file:close() |
|
|
169 |
|
|
|
170 |
---------------- Create visualization---------- |
|
|
171 |
cmd = 'Rscript ./heatmap_scripts/heatmap_saliency.R '..save_path..model_name..'_saliency'..t..'.csv '..save_path..model_name..'_saliency'..t..'.png -25' |
|
|
172 |
os.execute(cmd..' > /dev/null 2>&1') |
|
|
173 |
|
|
|
174 |
end -- loop through models |
|
|
175 |
|
|
|
176 |
collectgarbage() |
|
|
177 |
end -- test sequences |
|
|
178 |
|
|
|
179 |
print('') |
|
|
180 |
print(lfs.currentdir()..'/'..save_path) |
|
|
181 |
os.execute('rm '..save_path..'/*.csv > /dev/null 2>&1') |
|
|
182 |
os.execute('rm '..save_path..'/*.txt > /dev/null 2>&1') |
|
|
183 |
|
|
|
184 |
end -- TFs |