|
a |
|
b/network/CommGeneticsLoader.java |
|
|
1 |
package network; |
|
|
2 |
|
|
|
3 |
import java.io.BufferedReader; |
|
|
4 |
import java.io.BufferedWriter; |
|
|
5 |
import java.io.File; |
|
|
6 |
import java.io.FileReader; |
|
|
7 |
import java.io.FileWriter; |
|
|
8 |
import java.io.IOException; |
|
|
9 |
import java.util.ArrayList; |
|
|
10 |
import java.util.Arrays; |
|
|
11 |
import java.util.Collections; |
|
|
12 |
import java.util.Comparator; |
|
|
13 |
import java.util.HashMap; |
|
|
14 |
import java.util.HashSet; |
|
|
15 |
import java.util.LinkedHashMap; |
|
|
16 |
import java.util.LinkedList; |
|
|
17 |
import java.util.List; |
|
|
18 |
import java.util.Map; |
|
|
19 |
import java.util.Set; |
|
|
20 |
|
|
|
21 |
import GraphAlgorithm.ComSearch; |
|
|
22 |
import util.CollectionsEx; |
|
|
23 |
import util.DCNOMIMUMLSIDmap; |
|
|
24 |
|
|
|
25 |
/** |
|
|
26 |
* Provides methods for creating a bipartite network and write it to a network file |
|
|
27 |
* @author zhengc |
|
|
28 |
* |
|
|
29 |
*/ |
|
|
30 |
public class CommGeneticsLoader { |
|
|
31 |
public static HashSet<String> entry_set = new HashSet<String>(); |
|
|
32 |
public static HashSet<String> disease_set = new HashSet<String>(); |
|
|
33 |
public static HashSet<String> gene_set = new HashSet<String>(); |
|
|
34 |
public static HashSet<String> DMN_set = new HashSet<String>(); |
|
|
35 |
public static HashSet<String> DCN_set = new HashSet<String>(); |
|
|
36 |
|
|
|
37 |
public static List<String> entry_list = new ArrayList<String>(); |
|
|
38 |
public static List<String> disease_list = new ArrayList<String>(); |
|
|
39 |
public static List<String> gene_list = new ArrayList<String>(); |
|
|
40 |
public static List<String> DMN_list = new ArrayList<String>(); |
|
|
41 |
public static List<String> DCN_list = new ArrayList<String>(); |
|
|
42 |
|
|
|
43 |
public static int num_gene = 0; |
|
|
44 |
public static int num_dmn; |
|
|
45 |
public static int num_dcn; |
|
|
46 |
public static int num_disease; |
|
|
47 |
|
|
|
48 |
public static Map<Integer, Integer> dis_gene = new HashMap<Integer, Integer>(); |
|
|
49 |
|
|
|
50 |
|
|
|
51 |
public static Map<String, Integer> entry_index = new HashMap<String, Integer>(); |
|
|
52 |
public static Map<Integer, String> index_entry = new HashMap<Integer, String>(); |
|
|
53 |
|
|
|
54 |
public static Map<String, String> idnamemap = new HashMap<String, String>(); |
|
|
55 |
public static Map<String, String> nameidmap = new HashMap<String, String>(); |
|
|
56 |
public static Map<String, String> OMIMidnamemap = new HashMap<String, String>(); |
|
|
57 |
public static Map<String, String> OMIMnameidmap = new HashMap<String, String>(); |
|
|
58 |
|
|
|
59 |
public static HashSet<String> soc_set = new HashSet<String>(); |
|
|
60 |
public static Map<String, String> umlssocmap = new HashMap<String, String>(); |
|
|
61 |
public static Map<String, Integer> soc_idx = new HashMap<String, Integer>(); |
|
|
62 |
public static Map<Integer, String> idx_soc = new HashMap<Integer,String>(); |
|
|
63 |
public static final double P = 0.85; |
|
|
64 |
|
|
|
65 |
/** |
|
|
66 |
* Read DCN net and get DCN disease list |
|
|
67 |
* @param filename DCN net file |
|
|
68 |
* @throws IOException |
|
|
69 |
*/ |
|
|
70 |
public static void readCommNet(String filename) throws IOException{ |
|
|
71 |
// get all unique disease set |
|
|
72 |
BufferedReader br = new BufferedReader(new FileReader(new File(filename))); |
|
|
73 |
String line = br.readLine(); |
|
|
74 |
while ((line = br.readLine()) != null) { |
|
|
75 |
String disease = line.split("\\|")[0]; |
|
|
76 |
DCN_set.add(disease); |
|
|
77 |
} |
|
|
78 |
br.close(); |
|
|
79 |
DCN_list = util.CollectionsEx.setToList(DCN_set); |
|
|
80 |
num_dcn = DCN_set.size(); |
|
|
81 |
System.out.println("number of DCN: " + num_dcn); |
|
|
82 |
|
|
|
83 |
} |
|
|
84 |
|
|
|
85 |
/** |
|
|
86 |
* Write a map from DCN disease UMLS to disease name |
|
|
87 |
* @param imput map file from disease UMLS to name |
|
|
88 |
* @param output a file to be written |
|
|
89 |
* @throws IOException |
|
|
90 |
*/ |
|
|
91 |
public static void writeDCNIdName(String imput, String output) throws IOException { |
|
|
92 |
Map<String, String> idnamemap = new HashMap<String, String>(); |
|
|
93 |
|
|
|
94 |
BufferedReader br = new BufferedReader(new FileReader(new File(imput))); |
|
|
95 |
String line = br.readLine(); |
|
|
96 |
while ((line = br.readLine()) != null) { |
|
|
97 |
String disease_id = line.split("\\|")[0]; |
|
|
98 |
String disease_name = line.split("\\|")[1]; |
|
|
99 |
if(!idnamemap.keySet().contains(disease_id)) { |
|
|
100 |
idnamemap.put(disease_id, disease_name); |
|
|
101 |
} |
|
|
102 |
} |
|
|
103 |
br.close(); |
|
|
104 |
|
|
|
105 |
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(output))); |
|
|
106 |
Set<String> ids = idnamemap.keySet(); |
|
|
107 |
List<String> id_list = util.CollectionsEx.setToList(ids); |
|
|
108 |
Collections.sort(id_list); |
|
|
109 |
|
|
|
110 |
for(String id : id_list) { |
|
|
111 |
String name = idnamemap.get(id).replace("-- ", ""); |
|
|
112 |
bw.write(id + "|" + name + "\n"); |
|
|
113 |
} |
|
|
114 |
bw.close(); |
|
|
115 |
} |
|
|
116 |
|
|
|
117 |
/** |
|
|
118 |
* Reads DMN to get DMN disease list |
|
|
119 |
* @param filename DMN net file |
|
|
120 |
* @throws IOException |
|
|
121 |
*/ |
|
|
122 |
public static void readDMN(String filename) throws IOException{ |
|
|
123 |
// get all unique disease set |
|
|
124 |
BufferedReader br = new BufferedReader(new FileReader(new File(filename))); |
|
|
125 |
String line = br.readLine(); |
|
|
126 |
while ((line = br.readLine()) != null) { |
|
|
127 |
String disease1 = line.split("\\|")[0]; |
|
|
128 |
DMN_set.add(disease1); |
|
|
129 |
String disease2 = line.split("\\|")[1]; |
|
|
130 |
DMN_set.add(disease2); |
|
|
131 |
} |
|
|
132 |
br.close(); |
|
|
133 |
DMN_list = util.CollectionsEx.setToList(DMN_set); |
|
|
134 |
num_dmn = DMN_set.size(); |
|
|
135 |
System.out.println("number of DMN: " + num_dmn); |
|
|
136 |
|
|
|
137 |
} |
|
|
138 |
|
|
|
139 |
/** |
|
|
140 |
* Reads PPI to get gene list |
|
|
141 |
* @param filename |
|
|
142 |
* @throws IOException |
|
|
143 |
*/ |
|
|
144 |
public static void readPPI(String filename) throws IOException{ |
|
|
145 |
BufferedReader br = new BufferedReader(new FileReader(new File(filename))); |
|
|
146 |
String line = null; |
|
|
147 |
while ((line=br.readLine()) != null) { |
|
|
148 |
String gene = line.split("\\|")[0]; |
|
|
149 |
// System.out.println(2); |
|
|
150 |
gene_set.add(gene); |
|
|
151 |
} |
|
|
152 |
br.close(); |
|
|
153 |
gene_list = util.CollectionsEx.setToList(gene_set); |
|
|
154 |
num_gene = gene_set.size(); |
|
|
155 |
System.out.println("number of gene: " + num_gene); |
|
|
156 |
} |
|
|
157 |
|
|
|
158 |
/** |
|
|
159 |
* Reads UMLS-SOC map file to get UMLS-SOC map |
|
|
160 |
* @param filename |
|
|
161 |
* @throws IOException |
|
|
162 |
*/ |
|
|
163 |
public static void readUMLSSOC(String filename) throws IOException{ |
|
|
164 |
BufferedReader br = new BufferedReader(new FileReader(new File(filename))); |
|
|
165 |
String line = null; |
|
|
166 |
while ((line=br.readLine()) != null) { |
|
|
167 |
String[] parts = line.split("\\$"); |
|
|
168 |
String umls = parts[0]; |
|
|
169 |
String soc = parts[8]; |
|
|
170 |
soc_set.add(soc); |
|
|
171 |
umlssocmap.put(umls, soc); |
|
|
172 |
} |
|
|
173 |
br.close(); |
|
|
174 |
|
|
|
175 |
int m = 0; |
|
|
176 |
for (String soc : soc_set) { |
|
|
177 |
soc_idx.put(soc, m); |
|
|
178 |
idx_soc.put(m, soc); |
|
|
179 |
m++; |
|
|
180 |
} |
|
|
181 |
|
|
|
182 |
} |
|
|
183 |
|
|
|
184 |
|
|
|
185 |
|
|
|
186 |
|
|
|
187 |
/** |
|
|
188 |
* Creates entry index for all nodes |
|
|
189 |
* @param entry_list |
|
|
190 |
*/ |
|
|
191 |
public static void createIndex(List<String> entry_list) { |
|
|
192 |
int m = 0; |
|
|
193 |
for (String e : entry_list) { |
|
|
194 |
if (!entry_index.containsKey(e)) { |
|
|
195 |
entry_index.put(e, m); |
|
|
196 |
index_entry.put(m, e); |
|
|
197 |
m++; |
|
|
198 |
} |
|
|
199 |
} |
|
|
200 |
System.out.println("Total nodes: " + entry_index.size()); |
|
|
201 |
System.out.println("--------"); |
|
|
202 |
} |
|
|
203 |
|
|
|
204 |
/** |
|
|
205 |
* Build a bipartite graph including DCN and PPI, additional DCN-OMIM disease mapping also included |
|
|
206 |
* @param commnetfile DCN file |
|
|
207 |
* @param ppifile PPI file |
|
|
208 |
* @param disgenefile disease genetics file from OMIM |
|
|
209 |
* @param dcn_omim mapping from DCN disease to OMIM disease |
|
|
210 |
* @return a bipartite graph |
|
|
211 |
* @throws IOException |
|
|
212 |
*/ |
|
|
213 |
|
|
|
214 |
|
|
|
215 |
public static DisGraph createCommGeneticsGraph(String commnetfile, |
|
|
216 |
String ppifile, String disgenefile, Map<String, List<String>> dcn_omim) throws IOException { |
|
|
217 |
|
|
|
218 |
readCommNet(commnetfile); |
|
|
219 |
readPPI(ppifile); |
|
|
220 |
disease_set.addAll(DCN_set); |
|
|
221 |
disease_list = util.CollectionsEx.setToList(disease_set); |
|
|
222 |
entry_list.addAll(disease_list); |
|
|
223 |
|
|
|
224 |
entry_list.addAll(gene_list); |
|
|
225 |
createIndex(entry_list); |
|
|
226 |
num_disease = disease_list.size(); |
|
|
227 |
|
|
|
228 |
int n = entry_index.size(); |
|
|
229 |
DisGraph cgg = new DisGraph(n); |
|
|
230 |
|
|
|
231 |
|
|
|
232 |
// construct DCN net from network file |
|
|
233 |
// Note: there are two type of network files, attention should be paid that one file contains |
|
|
234 |
// additional abbreviation of SOC. Consequently, the second entry should be in column 6 instead 5 |
|
|
235 |
BufferedReader br1 = new BufferedReader(new FileReader(new File(commnetfile))); |
|
|
236 |
String line1 = null; |
|
|
237 |
while ((line1 = br1.readLine()) != null) { |
|
|
238 |
String [] s = line1.split("\\|"); |
|
|
239 |
// System.out.println(s[0]); |
|
|
240 |
// System.out.println(s[0]); |
|
|
241 |
if (entry_index.containsKey(s[0]) && entry_index.containsKey(s[5])) { |
|
|
242 |
int entry1 = entry_index.get(s[0]); |
|
|
243 |
int entry2 = entry_index.get(s[5]); |
|
|
244 |
// double score = Integer.parseInt(s[2]); |
|
|
245 |
cgg.addEdge(entry1, entry2); |
|
|
246 |
|
|
|
247 |
} |
|
|
248 |
} |
|
|
249 |
br1.close(); |
|
|
250 |
|
|
|
251 |
// Check if diagonal entry is 1 |
|
|
252 |
// for (int j=0; j<disease_list.size(); j++) { |
|
|
253 |
// System.out.println(cgg.getNet().get(j, j)); |
|
|
254 |
// } |
|
|
255 |
|
|
|
256 |
|
|
|
257 |
BufferedReader br2 = new BufferedReader(new FileReader(new File(ppifile))); |
|
|
258 |
String line2 = null; |
|
|
259 |
while ((line2 = br2.readLine()) != null) { |
|
|
260 |
String [] s = line2.split("\\|"); |
|
|
261 |
// System.out.println(s[0]); |
|
|
262 |
if (entry_index.containsKey(s[0]) && entry_index.containsKey(s[1])) { |
|
|
263 |
int entry1 = entry_index.get(s[0]); |
|
|
264 |
int entry2 = entry_index.get(s[1]); |
|
|
265 |
// System.out.println(s[0] +" " + entry1); |
|
|
266 |
// System.out.println(s[1] +" " + entry2); |
|
|
267 |
// double score = Integer.parseInt(s[2]); |
|
|
268 |
cgg.addEdge(entry1, entry2); |
|
|
269 |
} |
|
|
270 |
} |
|
|
271 |
br2.close(); |
|
|
272 |
|
|
|
273 |
// Check if diagonal entry is 1 |
|
|
274 |
// for (int j=disease_list.size(); j<entry_list.size(); j++) { |
|
|
275 |
// System.out.println(cgg.getNet().get(j, j)); |
|
|
276 |
// } |
|
|
277 |
|
|
|
278 |
BufferedReader br3 = new BufferedReader(new FileReader(new File(disgenefile))); |
|
|
279 |
String line3 = null; |
|
|
280 |
Set<String> mappedset = new HashSet<>(); |
|
|
281 |
|
|
|
282 |
while ((line3 = br3.readLine()) != null) { |
|
|
283 |
String [] s = line3.split("\\|"); |
|
|
284 |
// System.out.println(s[0]); |
|
|
285 |
String disid = s[0]; |
|
|
286 |
String gene = s[1]; |
|
|
287 |
if (entry_index.containsKey(disid) && entry_index.containsKey(gene)) { |
|
|
288 |
mappedset.add(disid); |
|
|
289 |
int entry1 = entry_index.get(disid); |
|
|
290 |
int entry2 = entry_index.get(gene); |
|
|
291 |
// System.out.println(s[0] +" " + entry1); |
|
|
292 |
// System.out.println(s[1] +" " + entry2); |
|
|
293 |
// double score = Integer.parseInt(s[2]); |
|
|
294 |
cgg.addEdge(entry1, entry2); |
|
|
295 |
} |
|
|
296 |
|
|
|
297 |
// add additional dcn_omim map |
|
|
298 |
for (String id : dcn_omim.keySet()) { |
|
|
299 |
List<String> omim_ids = dcn_omim.get(id); |
|
|
300 |
if (!entry_index.containsKey(disid) && omim_ids.contains(disid)) { |
|
|
301 |
disid = id; // update id |
|
|
302 |
if (entry_index.containsKey(disid) && entry_index.containsKey(gene)) { |
|
|
303 |
int entry1 = entry_index.get(disid); |
|
|
304 |
int entry2 = entry_index.get(gene); |
|
|
305 |
cgg.addEdge(entry1, entry2); |
|
|
306 |
} |
|
|
307 |
} |
|
|
308 |
} |
|
|
309 |
|
|
|
310 |
} |
|
|
311 |
br3.close(); |
|
|
312 |
|
|
|
313 |
// Check if diagonal entry is 1 |
|
|
314 |
// for (int j=disease_list.size(); j<entry_list.size(); j++) { |
|
|
315 |
// System.out.println(cgg.getNet().get(j, j)); |
|
|
316 |
// } |
|
|
317 |
|
|
|
318 |
System.out.println("Total mapped disease: " + mappedset.size()); |
|
|
319 |
|
|
|
320 |
// for (int i=0; i<n; i++) { |
|
|
321 |
// cgg.addEdge(i, i); |
|
|
322 |
// } |
|
|
323 |
return cgg; |
|
|
324 |
} |
|
|
325 |
|
|
|
326 |
/** |
|
|
327 |
* Build a bipartite graph including DCN, DMN and PPI |
|
|
328 |
* @param commnetfile DCN file |
|
|
329 |
* @param dmnfile DMN file |
|
|
330 |
* @param ppifile PPI file |
|
|
331 |
* @param disgenefile disease genetics file from OMIM |
|
|
332 |
* @return a bipartite graph |
|
|
333 |
* @throws IOException |
|
|
334 |
*/ |
|
|
335 |
public static DisGraph createCommGeneticsGraph(String commnetfile, String dmnfile, |
|
|
336 |
String ppifile, String disgenefile) throws IOException { |
|
|
337 |
|
|
|
338 |
|
|
|
339 |
readCommNet(commnetfile); |
|
|
340 |
readDMN(dmnfile); |
|
|
341 |
readPPI(ppifile); |
|
|
342 |
disease_set.addAll(DCN_set); |
|
|
343 |
disease_set.addAll(DMN_set); |
|
|
344 |
disease_list = util.CollectionsEx.setToList(disease_set); |
|
|
345 |
entry_list.addAll(disease_list); |
|
|
346 |
entry_list.addAll(gene_list); |
|
|
347 |
createIndex(entry_list); |
|
|
348 |
num_disease = disease_list.size(); |
|
|
349 |
|
|
|
350 |
int n = entry_index.size(); |
|
|
351 |
DisGraph cgg = new DisGraph(n); |
|
|
352 |
// System.out.println(n); |
|
|
353 |
|
|
|
354 |
BufferedReader br1 = new BufferedReader(new FileReader(new File(commnetfile))); |
|
|
355 |
String line1 = null; |
|
|
356 |
while ((line1 = br1.readLine()) != null) { |
|
|
357 |
String [] s = line1.split("\\|"); |
|
|
358 |
// System.out.println(s[0]); |
|
|
359 |
// System.out.println(s[0]); |
|
|
360 |
if (entry_index.containsKey(s[0]) && entry_index.containsKey(s[4])) { |
|
|
361 |
int entry1 = entry_index.get(s[0]); |
|
|
362 |
int entry2 = entry_index.get(s[4]); |
|
|
363 |
// double score = Integer.parseInt(s[2]); |
|
|
364 |
cgg.addEdge(entry2, entry1); |
|
|
365 |
} |
|
|
366 |
} |
|
|
367 |
br1.close(); |
|
|
368 |
|
|
|
369 |
BufferedReader br2 = new BufferedReader(new FileReader(new File(dmnfile))); |
|
|
370 |
String line2 = null; |
|
|
371 |
while ((line2 = br2.readLine()) != null) { |
|
|
372 |
String [] s = line2.split("\\|"); |
|
|
373 |
// System.out.println(s[0]); |
|
|
374 |
// System.out.println(s[0]); |
|
|
375 |
if (entry_index.containsKey(s[0]) && entry_index.containsKey(s[1])) { |
|
|
376 |
int entry1 = entry_index.get(s[0]); |
|
|
377 |
int entry2 = entry_index.get(s[1]); |
|
|
378 |
// double score = Integer.parseInt(s[2]); |
|
|
379 |
cgg.addEdge(entry1, entry2); |
|
|
380 |
|
|
|
381 |
} |
|
|
382 |
} |
|
|
383 |
br2.close(); |
|
|
384 |
|
|
|
385 |
BufferedReader br3 = new BufferedReader(new FileReader(new File(ppifile))); |
|
|
386 |
String line3 = null; |
|
|
387 |
while ((line3 = br3.readLine()) != null) { |
|
|
388 |
String [] s = line3.split("\\|"); |
|
|
389 |
// System.out.println(s[0]); |
|
|
390 |
if (entry_index.containsKey(s[0]) && entry_index.containsKey(s[1])) { |
|
|
391 |
int entry1 = entry_index.get(s[0]); |
|
|
392 |
int entry2 = entry_index.get(s[1]); |
|
|
393 |
// System.out.println(s[0] +" " + entry1); |
|
|
394 |
// System.out.println(s[1] +" " + entry2); |
|
|
395 |
// double score = Integer.parseInt(s[2]); |
|
|
396 |
cgg.addEdge(entry1, entry2); |
|
|
397 |
} |
|
|
398 |
} |
|
|
399 |
br3.close(); |
|
|
400 |
|
|
|
401 |
BufferedReader br4 = new BufferedReader(new FileReader(new File(disgenefile))); |
|
|
402 |
String line4 = null; |
|
|
403 |
Set<String> mappedset = new HashSet<>(); |
|
|
404 |
|
|
|
405 |
while ((line4 = br4.readLine()) != null) { |
|
|
406 |
String [] s = line4.split("\\|"); |
|
|
407 |
// System.out.println(s[0]); |
|
|
408 |
String disid = s[0]; |
|
|
409 |
String gene = s[1]; |
|
|
410 |
if (entry_index.containsKey(disid) && entry_index.containsKey(gene)) { |
|
|
411 |
mappedset.add(disid); |
|
|
412 |
int entry1 = entry_index.get(disid); |
|
|
413 |
int entry2 = entry_index.get(s[1]); |
|
|
414 |
// System.out.println(s[0] +" " + entry1); |
|
|
415 |
// System.out.println(s[1] +" " + entry2); |
|
|
416 |
// double score = Integer.parseInt(s[2]); |
|
|
417 |
cgg.addEdge(entry1, entry2); |
|
|
418 |
} |
|
|
419 |
|
|
|
420 |
} |
|
|
421 |
br4.close(); |
|
|
422 |
|
|
|
423 |
System.out.println("Total mapped disease: " + mappedset.size()); |
|
|
424 |
|
|
|
425 |
for (int i=0; i<n; i++) { |
|
|
426 |
cgg.addEdge(i, i); |
|
|
427 |
} |
|
|
428 |
return cgg; |
|
|
429 |
} |
|
|
430 |
|
|
|
431 |
/** |
|
|
432 |
* Build a bipartite graph including DCN and PPI, extra mapping and DMN are optional |
|
|
433 |
* |
|
|
434 |
* @param commnetfile DCN file |
|
|
435 |
* @param dmnfile DMN file |
|
|
436 |
* @param ppifile PPI file |
|
|
437 |
* @param disgenefile disease genetics file from OMIM |
|
|
438 |
* @param dcn_omim mapping from DCN disease to OMIM disease |
|
|
439 |
* @param dmn a flag to indicate if DMN will be included |
|
|
440 |
* @param extra a flag to indicate if extra mapping will be included |
|
|
441 |
* @return a bipartite graph |
|
|
442 |
* @throws IOException |
|
|
443 |
*/ |
|
|
444 |
public static DisGraph createCommGeneticsGraph(String commnetfile, String dmnfile, |
|
|
445 |
String ppifile, String disgenefile, Map<String, List<String>> dcn_omim, Boolean dmn, Boolean extra) throws IOException { |
|
|
446 |
|
|
|
447 |
|
|
|
448 |
readCommNet(commnetfile); |
|
|
449 |
disease_set.addAll(DCN_set); |
|
|
450 |
if(dmn) { |
|
|
451 |
readDMN(dmnfile); |
|
|
452 |
disease_set.addAll(DMN_set); |
|
|
453 |
} |
|
|
454 |
disease_list = util.CollectionsEx.setToList(disease_set); |
|
|
455 |
readPPI(ppifile); |
|
|
456 |
|
|
|
457 |
entry_list.addAll(disease_list); |
|
|
458 |
entry_list.addAll(gene_list); |
|
|
459 |
createIndex(entry_list); |
|
|
460 |
num_disease = disease_list.size(); |
|
|
461 |
|
|
|
462 |
int n = entry_index.size(); |
|
|
463 |
DisGraph cgg = new DisGraph(n); |
|
|
464 |
// System.out.println(n); |
|
|
465 |
|
|
|
466 |
BufferedReader br1 = new BufferedReader(new FileReader(new File(commnetfile))); |
|
|
467 |
String line1 = null; |
|
|
468 |
while ((line1 = br1.readLine()) != null) { |
|
|
469 |
String [] s = line1.split("\\|"); |
|
|
470 |
// System.out.println(s[0]); |
|
|
471 |
// System.out.println(s[0]); |
|
|
472 |
if (entry_index.containsKey(s[0]) && entry_index.containsKey(s[4])) { |
|
|
473 |
int entry1 = entry_index.get(s[0]); |
|
|
474 |
int entry2 = entry_index.get(s[4]); |
|
|
475 |
// double score = Integer.parseInt(s[2]); |
|
|
476 |
cgg.addEdge(entry2, entry1); |
|
|
477 |
} |
|
|
478 |
} |
|
|
479 |
br1.close(); |
|
|
480 |
|
|
|
481 |
if(dmn) { |
|
|
482 |
BufferedReader br2 = new BufferedReader(new FileReader(new File(dmnfile))); |
|
|
483 |
String line2 = null; |
|
|
484 |
while ((line2 = br2.readLine()) != null) { |
|
|
485 |
String [] s = line2.split("\\|"); |
|
|
486 |
// System.out.println(s[0]); |
|
|
487 |
// System.out.println(s[0]); |
|
|
488 |
if (entry_index.containsKey(s[0]) && entry_index.containsKey(s[1])) { |
|
|
489 |
int entry1 = entry_index.get(s[0]); |
|
|
490 |
int entry2 = entry_index.get(s[1]); |
|
|
491 |
// double score = Integer.parseInt(s[2]); |
|
|
492 |
cgg.addEdge(entry1, entry2); |
|
|
493 |
|
|
|
494 |
} |
|
|
495 |
} |
|
|
496 |
br2.close(); |
|
|
497 |
} |
|
|
498 |
|
|
|
499 |
|
|
|
500 |
BufferedReader br3 = new BufferedReader(new FileReader(new File(ppifile))); |
|
|
501 |
String line3 = null; |
|
|
502 |
while ((line3 = br3.readLine()) != null) { |
|
|
503 |
String [] s = line3.split("\\|"); |
|
|
504 |
// System.out.println(s[0]); |
|
|
505 |
if (entry_index.containsKey(s[0]) && entry_index.containsKey(s[1])) { |
|
|
506 |
int entry1 = entry_index.get(s[0]); |
|
|
507 |
int entry2 = entry_index.get(s[1]); |
|
|
508 |
// System.out.println(s[0] +" " + entry1); |
|
|
509 |
// System.out.println(s[1] +" " + entry2); |
|
|
510 |
// double score = Integer.parseInt(s[2]); |
|
|
511 |
cgg.addEdge(entry1, entry2); |
|
|
512 |
} |
|
|
513 |
} |
|
|
514 |
br3.close(); |
|
|
515 |
|
|
|
516 |
BufferedReader br4 = new BufferedReader(new FileReader(new File(disgenefile))); |
|
|
517 |
String line4 = null; |
|
|
518 |
Set<String> mappedset = new HashSet<>(); |
|
|
519 |
|
|
|
520 |
while ((line4 = br4.readLine()) != null) { |
|
|
521 |
String [] s = line4.split("\\|"); |
|
|
522 |
// System.out.println(s[0]); |
|
|
523 |
String disid = s[0]; |
|
|
524 |
String gene = s[1]; |
|
|
525 |
if (entry_index.containsKey(disid) && entry_index.containsKey(gene)) { |
|
|
526 |
mappedset.add(disid); |
|
|
527 |
int entry1 = entry_index.get(disid); |
|
|
528 |
int entry2 = entry_index.get(gene); |
|
|
529 |
// System.out.println(s[0] +" " + entry1); |
|
|
530 |
// System.out.println(s[1] +" " + entry2); |
|
|
531 |
// double score = Integer.parseInt(s[2]); |
|
|
532 |
cgg.addEdge(entry1, entry2); |
|
|
533 |
} |
|
|
534 |
|
|
|
535 |
// Additional mapping according to biological meaning |
|
|
536 |
|
|
|
537 |
// if (!entry_index.containsKey(disid) && extra) { |
|
|
538 |
// for (String id : dcn_omim.keySet()) { |
|
|
539 |
// List<String> omim_ids = dcn_omim.get(id); |
|
|
540 |
// if (omim_ids.contains(disid)) { |
|
|
541 |
// disid = id; // update id |
|
|
542 |
// if (entry_index.containsKey(disid) && entry_index.containsKey(gene)) { |
|
|
543 |
// int entry1 = entry_index.get(disid); |
|
|
544 |
// int entry2 = entry_index.get(gene); |
|
|
545 |
// cgg.addEdge(entry1, entry2); |
|
|
546 |
// } |
|
|
547 |
// } |
|
|
548 |
// } |
|
|
549 |
|
|
|
550 |
if(extra) { |
|
|
551 |
for (String id : dcn_omim.keySet()) { |
|
|
552 |
List<String> omim_ids = dcn_omim.get(id); |
|
|
553 |
if (!entry_index.containsKey(disid) && omim_ids.contains(disid)) { |
|
|
554 |
disid = id; // update id |
|
|
555 |
if (entry_index.containsKey(disid) && entry_index.containsKey(gene)) { |
|
|
556 |
int entry1 = entry_index.get(disid); |
|
|
557 |
int entry2 = entry_index.get(gene); |
|
|
558 |
cgg.addEdge(entry1, entry2); |
|
|
559 |
} |
|
|
560 |
} |
|
|
561 |
} |
|
|
562 |
} |
|
|
563 |
} |
|
|
564 |
br4.close(); |
|
|
565 |
|
|
|
566 |
System.out.println("Total mapped disease: " + mappedset.size()); |
|
|
567 |
|
|
|
568 |
for (int i=0; i<n; i++) { |
|
|
569 |
cgg.addEdge(i, i); |
|
|
570 |
} |
|
|
571 |
return cgg; |
|
|
572 |
} |
|
|
573 |
|
|
|
574 |
public static DisGraph createCommGeneticsGraph(DisGraph dg, DisGraph pp, |
|
|
575 |
Map<Integer, List<Integer>> dis_genelist) { |
|
|
576 |
int N1 = dg.getNodes(); |
|
|
577 |
int N2 = pp.getNodes(); |
|
|
578 |
DisGraph cgg = new DisGraph(N1 + N2); |
|
|
579 |
|
|
|
580 |
//add disease comorbidity network |
|
|
581 |
for (int i=0; i<N1; i++) { |
|
|
582 |
Set<Integer> neibors = dg.getNeibor(i).getKeys(); |
|
|
583 |
for (int neibor : neibors) { |
|
|
584 |
cgg.addEdge(i, neibor); |
|
|
585 |
} |
|
|
586 |
} |
|
|
587 |
|
|
|
588 |
//add protein-protein interaction network |
|
|
589 |
for (int i=0; i<N2; i++) { |
|
|
590 |
Set<Integer> neibors = pp.getNeibor(i).getKeys(); |
|
|
591 |
for (int neibor : neibors) { |
|
|
592 |
int idx1 = i + N1; |
|
|
593 |
int idx2 = neibor + N1; |
|
|
594 |
cgg.addEdge(idx1, idx2); |
|
|
595 |
} |
|
|
596 |
} |
|
|
597 |
|
|
|
598 |
//add disease-gene connection |
|
|
599 |
for (int dis_idx: dis_genelist.keySet()) { |
|
|
600 |
for (int gene_idx: dis_genelist.get(dis_idx)) { |
|
|
601 |
cgg.addEdge(dis_idx, gene_idx + N1); |
|
|
602 |
} |
|
|
603 |
} |
|
|
604 |
|
|
|
605 |
|
|
|
606 |
return cgg; |
|
|
607 |
} |
|
|
608 |
|
|
|
609 |
|
|
|
610 |
/** |
|
|
611 |
* Creates a UMLS-name mapping |
|
|
612 |
* @param mapfiles mapping file |
|
|
613 |
* @throws IOException |
|
|
614 |
*/ |
|
|
615 |
public static void createIdNameMap(List<String> mapfiles) throws IOException { |
|
|
616 |
|
|
|
617 |
for (String mapfile : mapfiles) { |
|
|
618 |
BufferedReader br = new BufferedReader(new FileReader(mapfile)); |
|
|
619 |
String line = null; |
|
|
620 |
while((line = br.readLine()) != null){ |
|
|
621 |
String[] parts = line.split("\\|"); |
|
|
622 |
idnamemap.put(parts[0], parts[1].toLowerCase()); |
|
|
623 |
nameidmap.put(parts[1].toLowerCase(), parts[0]); |
|
|
624 |
} |
|
|
625 |
br.close(); |
|
|
626 |
} |
|
|
627 |
|
|
|
628 |
} |
|
|
629 |
|
|
|
630 |
/** |
|
|
631 |
* Create UMLS-name mapping for OMIM |
|
|
632 |
* @param mapfiles |
|
|
633 |
* @throws IOException |
|
|
634 |
*/ |
|
|
635 |
public static void createOMIMIdNameMap(List<String> mapfiles) throws IOException { |
|
|
636 |
|
|
|
637 |
for (String mapfile : mapfiles) { |
|
|
638 |
BufferedReader br = new BufferedReader(new FileReader(mapfile)); |
|
|
639 |
String line = null; |
|
|
640 |
while((line = br.readLine()) != null){ |
|
|
641 |
String[] parts = line.split("\\|"); |
|
|
642 |
OMIMidnamemap.put(parts[1], parts[0].toLowerCase()); |
|
|
643 |
OMIMnameidmap.put(parts[0].toLowerCase(), parts[1]); |
|
|
644 |
} |
|
|
645 |
br.close(); |
|
|
646 |
} |
|
|
647 |
|
|
|
648 |
} |
|
|
649 |
|
|
|
650 |
/** |
|
|
651 |
* Write the bipartite graph to a file, format is "node1|node2|score" |
|
|
652 |
* @param cgg the bipartite file |
|
|
653 |
* @param filename a file to be written |
|
|
654 |
* @throws IOException |
|
|
655 |
*/ |
|
|
656 |
private static void writeCommGeneticsNet(DisGraph cgg, String filename) throws IOException { |
|
|
657 |
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(filename))); |
|
|
658 |
for (int i = 0; i < cgg.getNet().size(); i++) { |
|
|
659 |
Set<Integer> sv = cgg.getNet().getKey(i); |
|
|
660 |
for (int k : sv) { |
|
|
661 |
String d1 = index_entry.get(i); |
|
|
662 |
String d2 = index_entry.get(k); |
|
|
663 |
double score = cgg.getNet().get(i, k); |
|
|
664 |
bw.write(d1 + "|" + d2 + "|" + score + "|" + "\n"); |
|
|
665 |
} |
|
|
666 |
|
|
|
667 |
} |
|
|
668 |
bw.close(); |
|
|
669 |
} |
|
|
670 |
|
|
|
671 |
|
|
|
672 |
|
|
|
673 |
public static void main(String[] args) throws IOException { |
|
|
674 |
|
|
|
675 |
|
|
|
676 |
// ######################################################################### |
|
|
677 |
// ----------------------- Method 1: Build network from files -------------------- |
|
|
678 |
// ######################################################################### |
|
|
679 |
|
|
|
680 |
/* |
|
|
681 |
* Create additional DCN_OMIM map for give disease |
|
|
682 |
*/ |
|
|
683 |
|
|
|
684 |
// String DCN_dis = "dementia"; |
|
|
685 |
// String OMIM_pat = "alzheimer"; |
|
|
686 |
// |
|
|
687 |
// DCNOMIMUMLSIDmap domim = new DCNOMIMUMLSIDmap(DCN_dis, OMIM_pat); |
|
|
688 |
// Map<String, List<String>> dcn_omim = domim.getDCNOMIMUMLSIDmap(); |
|
|
689 |
// |
|
|
690 |
// /* |
|
|
691 |
// * Build the bipartite network |
|
|
692 |
// */ |
|
|
693 |
//// String commnetfile = "/Users/zhengc/Projects/AD_comorbidity/data/fares_comm_net_conf_ISMB_final_public.txt"; |
|
|
694 |
//// String dmnfile = "/Users/zhengc/Projects/AD_comorbidity/data/dmn_dm.txt"; |
|
|
695 |
//// String ppifile = "/Users/zhengc/Projects/AD_comorbidity/data/gene_gene_string_cut.txt"; |
|
|
696 |
//// String disgenefile = "/Users/zhengc/workspace/FARES_final/data/OMIM/mapped_OMIM/OMIM_disease_gene_umls_id_diso"; |
|
|
697 |
// |
|
|
698 |
// String commnetfile = "/Users/zhengc/workspace/FARES_final/analysis/network/DCN/fares_comm_net_lift_final_abbr.txt";; |
|
|
699 |
//// String dmnfile = "/Users/zhengc/workspace/FARES_final/analysis/AD_comorbidity/data/dmn_dm.txt"; |
|
|
700 |
// String ppifile = "/Users/zhengc/workspace/FARES_final/analysis/AD_comorbidity/data/gene_gene_string_cut.txt"; |
|
|
701 |
// String disgenefile = "/Users/zhengc/workspace/FARES_final/data/OMIM/mapped_OMIM/OMIM_disease_gene_umls_id_diso"; |
|
|
702 |
// |
|
|
703 |
// |
|
|
704 |
// DisGraph cgg = null; |
|
|
705 |
// try { |
|
|
706 |
//// cgg = CommGeneticsLoader.createCommGeneticsGraph(commnetfile, dmnfile, ppifile, disgenefile, dcn_omim); |
|
|
707 |
// cgg = CommGeneticsLoader.createCommGeneticsGraph(commnetfile, ppifile, disgenefile, dcn_omim); |
|
|
708 |
// System.out.println("Total edges: " + cgg.getEdges()); |
|
|
709 |
// } catch (IOException e) { |
|
|
710 |
// // TODO Auto-generated catch block |
|
|
711 |
// e.printStackTrace(); |
|
|
712 |
// } |
|
|
713 |
// |
|
|
714 |
// |
|
|
715 |
// |
|
|
716 |
// /* Write commgenetics file */ |
|
|
717 |
// String netfile = "/Users/zhengc/workspace/FARES_final/analysis/network/DCN_PPI/DCN_PPI_net_lift.txt"; |
|
|
718 |
// writeCommGeneticsNet(cgg, netfile); |
|
|
719 |
// |
|
|
720 |
// String disUMLS_name_file = "/Users/zhengc/workspace/FARES_final/analysis/network/DCN_PPI/disUMLS_name_lift.txt"; |
|
|
721 |
// writeDCNIdName(commnetfile, disUMLS_name_file); |
|
|
722 |
// |
|
|
723 |
// String dcnmapfile = "/Users/zhengc/workspace/FARES_final/data/FARES/map/term_umls_id_diso"; |
|
|
724 |
// util.DCNOMIMUMLSIDmap.createDCNIdNameMap(dcnmapfile); |
|
|
725 |
// String diseaseid = util.DCNOMIMUMLSIDmap.dcnnameidmap.get(DCN_dis); |
|
|
726 |
// System.out.println(diseaseid); |
|
|
727 |
// int disease_idx = entry_index.get(diseaseid); |
|
|
728 |
// System.out.println(disease_idx); |
|
|
729 |
// System.out.println(cgg.getNeibor(disease_idx)); |
|
|
730 |
// |
|
|
731 |
// Map<String, Double> commDisMap = ComSearch.searchFaers2(diseaseid, cgg); |
|
|
732 |
// System.out.println("\nNeibor node method......"); |
|
|
733 |
// System.out.println("Number of comorbidities found: " + commDisMap.size() + "\n"); |
|
|
734 |
// for (String dis : commDisMap.keySet()) { |
|
|
735 |
// System.out.println(dis); |
|
|
736 |
// } |
|
|
737 |
|
|
|
738 |
|
|
|
739 |
// ######################################################################### |
|
|
740 |
// --------------------- Method 2: Build network from network ------------- |
|
|
741 |
// ######################################################################### |
|
|
742 |
|
|
|
743 |
String dcnmapfile = "/Users/zhengc/workspace/FARES_final/data/FARES/map/term_umls_id_diso"; |
|
|
744 |
String omimmapfile = "/Users/zhengc/workspace/FARES_final/data/OMIM/map/OMIM_umls_id_diso"; |
|
|
745 |
util.DCNOMIMUMLSIDmap.createDCNIdNameMap(dcnmapfile); |
|
|
746 |
util.DCNOMIMUMLSIDmap.createOMIMIdNameMap(omimmapfile); |
|
|
747 |
|
|
|
748 |
String DCN_dis = "dementia"; |
|
|
749 |
String OMIM_pat = "alzheimer"; |
|
|
750 |
|
|
|
751 |
DCNOMIMUMLSIDmap domim = new DCNOMIMUMLSIDmap(DCN_dis, OMIM_pat); |
|
|
752 |
Map<String, List<String>> dcn_omim = domim.getDCNOMIMUMLSIDmap(); |
|
|
753 |
for (String dis: dcn_omim.keySet()) { |
|
|
754 |
System.out.println(dcn_omim.get(dis)); |
|
|
755 |
} |
|
|
756 |
|
|
|
757 |
/* |
|
|
758 |
* Build heterogenous network |
|
|
759 |
*/ |
|
|
760 |
|
|
|
761 |
String rulefile = "./data/ID_indications_all_clean_width_umls_id_diso_filtered_sp_lift_1_0.000002_3_processed.txt"; |
|
|
762 |
String commnetfile = "./results/fares_comm_net_lift_final_abbr.txt";; |
|
|
763 |
String ppifile = "./data/gene_gene_string_cut.txt"; |
|
|
764 |
String disgenefile = "./data/OMIM_disease_gene_umls_id_diso"; |
|
|
765 |
|
|
|
766 |
CommLoader.readAssocRules(rulefile); |
|
|
767 |
DisGraph commnet = CommLoader.createCommNet(rulefile); |
|
|
768 |
CommLoader.readPPI(ppifile); |
|
|
769 |
DisGraph ppinet = CommLoader.createPPINet(ppifile); |
|
|
770 |
|
|
|
771 |
Map<Integer, List<Integer>> dis_gene = CommLoader.readDisGene(disgenefile, dcn_omim); |
|
|
772 |
|
|
|
773 |
|
|
|
774 |
DisGraph cgg2 = createCommGeneticsGraph(commnet, ppinet, dis_gene); |
|
|
775 |
System.out.println(cgg2.getEdges()); |
|
|
776 |
System.out.println(cgg2.getNodes()); |
|
|
777 |
|
|
|
778 |
String netfile = "./results/DCN_PPI/DCN_PPI_net_lift.txt"; |
|
|
779 |
writeCommGeneticsNet(cgg, netfile); |
|
|
780 |
|
|
|
781 |
String disUMLS_name_file = "./results/disUMLS_name_lift.txt"; |
|
|
782 |
writeDCNIdName(commnetfile, disUMLS_name_file); |
|
|
783 |
} |
|
|
784 |
} |