a b/network/CommLoader.java
1
package network;
2
3
import java.io.BufferedReader;
4
import java.io.BufferedWriter;
5
import java.io.File;
6
import java.io.FileNotFoundException;
7
import java.io.FileReader;
8
import java.io.FileWriter;
9
import java.io.IOException;
10
import java.util.ArrayList;
11
import java.util.Arrays;
12
import java.util.Collections;
13
import java.util.Comparator;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.LinkedHashMap;
17
import java.util.LinkedList;
18
import java.util.List;
19
import java.util.Map;
20
import java.util.Set;
21
22
import GraphAlgorithm.GraphGenerator;
23
import GraphAlgorithm.RandGraph;
24
25
/**
26
 * Provides methods for building a disese comorbidity network (DCN) from FAERS \
27
 * and save it into a file
28
 * 
29
 * @author zhengc
30
 *
31
 */
32
public class CommLoader {
33
    public static HashSet<String> disease_set = new HashSet<String>();
34
    public static HashMap<String, Integer> disease_index = new HashMap<String, Integer>();
35
    public static HashMap<Integer, String> index_disease = new HashMap<Integer, String>();
36
    public static Map<String, String> idnamemap = new HashMap<String, String>();
37
    public static Map<String, String> nameidmap = new HashMap<String, String>();
38
    public static HashSet<String> soc_set = new HashSet<String>();
39
    public static Map<String, String> umlssocmap = new HashMap<String, String>();
40
    public static Map<String, String> umlssocabbrmap = new HashMap<String, String>();
41
    public static Map<String, Integer> soc_idx = new HashMap<String, Integer>();
42
    public static Map<Integer, String> idx_soc = new HashMap<Integer,String>();
43
    public static final double P = 0.85;
44
    
45
    public static HashSet<String> gene_set = new HashSet<String>();
46
    public static List<String> gene_list = new ArrayList<String>();
47
    public static HashMap<String, Integer> gene_index = new HashMap<String, Integer>();
48
    public static HashMap<Integer, String> index_gene = new HashMap<Integer, String>();
49
    
50
    
51
    
52
    /**
53
     * Reads an association rule file to create necessary map file for building DCN
54
     * @param filename an association rule file 
55
     * @throws IOException
56
     */
57
    public static void readAssocRules(String filename) throws IOException{
58
        // get all unique disease set
59
        BufferedReader br = new BufferedReader(new FileReader(new File(filename)));
60
        String line = br.readLine();
61
        while ((line = br.readLine()) != null) {
62
            line = line.replace("[", "");
63
            line = line.replace("]", "");
64
//          System.out.println(line);
65
            String[] symptoms = line.split("\\|")[0].split(",");
66
//          System.out.println(symptoms.length);
67
            String[] diseases = line.split("\\|")[2].split(",");
68
            
69
            for (int i = 0; i < symptoms.length; i++) {
70
                symptoms[i] = symptoms[i].trim();
71
                disease_set.add(symptoms[i]);
72
            }
73
            for (int j = 0; j < diseases.length; j++) {
74
                diseases[j] = diseases[j].trim();
75
                disease_set.add(diseases[j]);
76
            }
77
        }
78
        br.close();
79
        
80
        int m = 0; 
81
        for (String d: disease_set) {
82
                disease_index.put(d, m);
83
                index_disease.put(m, d);
84
                m++;
85
        }
86
    }
87
    
88
    public static void readUMLSSOC(String filename) throws IOException{
89
        BufferedReader br = new BufferedReader(new FileReader(new File(filename)));
90
        String line = null;
91
        while ((line=br.readLine()) != null) {
92
            String[] parts = line.split("\\$");
93
            String umls = parts[0];
94
            String soc = parts[8];
95
            String soc_abbr = parts[9];
96
            soc_set.add(soc);
97
            umlssocmap.put(umls, soc);
98
            umlssocabbrmap.put(umls, soc_abbr);
99
        }
100
        br.close();
101
        
102
        int m = 0;
103
        for (String soc : soc_set) {
104
            soc_idx.put(soc, m);
105
            idx_soc.put(m, soc);
106
            m++;
107
        }
108
        
109
    }
110
    
111
    
112
    public static void readPPI(String filename) throws IOException{
113
        BufferedReader br = new BufferedReader(new FileReader(new File(filename)));
114
        String line = null;
115
        while ((line=br.readLine()) != null) {
116
            String gene = line.split("\\|")[0];
117
//          System.out.println(2);
118
            gene_set.add(gene);
119
        }
120
        br.close();
121
        gene_list = util.CollectionsEx.setToList(gene_set);
122
        
123
        int m = 0; 
124
        for (String d: gene_set) {
125
                gene_index.put(d, m);
126
                index_gene.put(m, d);
127
                m++;
128
        }
129
        
130
    }
131
    
132
    
133
    public static DisGraph createPPINet(String ppifile) throws IOException {
134
        File file = new File(ppifile);
135
        BufferedReader br = new BufferedReader(new FileReader(file));
136
        
137
        int n = gene_index.size();
138
        DisGraph ppinet = new DisGraph(n);
139
        
140
        String line = br.readLine();
141
        while ((line = br.readLine()) != null) {
142
            String[] parts = line.split("\\|");
143
            String dis1 = parts[0];
144
            int d1 = gene_index.get(dis1);
145
//          System.out.println(symptoms.length);
146
            String dis2 = parts[1];
147
            int d2 = gene_index.get(dis2);
148
            ppinet.addEdge(d1, d2);
149
        }
150
        br.close();
151
        return ppinet;
152
    }
153
    
154
    
155
    public static Map<Integer, List<Integer>> readDisGene(String disgenefile, 
156
            Map<String, List<String>> dcn_omim) throws IOException{
157
        
158
        Map<Integer, List<Integer>> dis_gene = new HashMap<>();
159
        BufferedReader br = new BufferedReader(new FileReader(new File(disgenefile)));
160
        String line = null;
161
        
162
        while ((line = br.readLine()) != null) {
163
            String [] s = line.split("\\|");
164
//          System.out.println(s[0]);
165
            String dis = s[0];
166
            String gene = s[1];
167
            if (disease_index.containsKey(dis)) {
168
                int dis_idx = disease_index.get(dis);
169
                if (gene_index.containsKey(gene)) {
170
                    int gene_idx = gene_index.get(gene);
171
                    if (!dis_gene.containsKey(dis_idx)) {
172
                        List<Integer> genes = new ArrayList<Integer>();
173
                        genes.add(gene_idx);
174
                        dis_gene.put(dis_idx, genes);
175
                    } else {
176
                        dis_gene.get(dis_idx).add(gene_idx);
177
                    }
178
                }
179
            } else {
180
                for (String id : dcn_omim.keySet()) {
181
                    List<String> omim_ids = dcn_omim.get(id);
182
                    if (omim_ids.contains(dis)) {
183
                        dis = id; // update id
184
//                      System.out.println(dis);
185
                        if (disease_index.containsKey(dis) && gene_index.containsKey(gene)) {
186
                            int dis_idx = disease_index.get(dis);
187
                            int gene_idx = gene_index.get(gene);
188
                            if (!dis_gene.containsKey(dis_idx)) {
189
                                List<Integer> genes = new ArrayList<Integer>();
190
                                genes.add(gene_idx);
191
                                dis_gene.put(dis_idx, genes);
192
                            } else {
193
                                dis_gene.get(dis_idx).add(gene_idx);
194
                            }
195
                        }
196
                    }
197
                }
198
            }
199
            
200
            // add additional dcn_omim map
201
            
202
            
203
            
204
//          for (String id : dcn_omim.keySet()) {
205
//              List<String> omim_ids = dcn_omim.get(id);
206
//              if (!disease_index.containsKey(dis) && omim_ids.contains(dis)) {
207
//                  dis = id; // update id
208
//                  if (disease_index.containsKey(dis) && gene_index.containsKey(gene)) {
209
//                      int dis_idx = disease_index.get(dis);
210
//                      int gene_idx = gene_index.get(gene);
211
//                      dis_gene.get(dis_idx).add(gene_idx);
212
//                  }
213
//              }
214
//          }
215
        }
216
        br.close();
217
        return dis_gene;
218
    } 
219
    
220
    /**
221
     * Builds a DCN from an association rule file
222
     * @param filename an association rule file
223
     * @return an undirected and unweighted DisGraph
224
     * @throws IOException
225
     */
226
    public static DisGraph createCommNet(String filename) throws IOException{
227
        
228
        File file = new File(filename);
229
        BufferedReader br = new BufferedReader(new FileReader(file));
230
        
231
        int n = disease_index.size();
232
        DisGraph commnet = new DisGraph(n);
233
        
234
        String line = br.readLine();
235
        while ((line = br.readLine()) != null) {
236
        
237
            line = line.replace("[", "");
238
            line = line.replace("]", "");
239
        
240
            //undirected, unweighted network 
241
         
242
            String[] disease1 = line.split("\\|")[0].split(",");
243
            List<String> diseases_raw = new ArrayList<String>(Arrays.asList(disease1));
244
            String[] disease2 = line.split("\\|")[2].split(",");
245
            diseases_raw.addAll(Arrays.asList(disease2));
246
            
247
            List<String> diseases = new ArrayList<String>();
248
            for (String d:diseases_raw) {
249
                d = d.trim();
250
                diseases.add(d);
251
            }
252
            
253
            for (int i=0; i<diseases.size() - 1; i++) {
254
                //System.out.println(diseases.get(i));
255
                int d1 = disease_index.get(diseases.get(i));
256
                for (int j=i+1; j<diseases.size(); j++) {
257
                    int d2 = disease_index.get(diseases.get(j));
258
                    //double conf = Double.parseDouble(line.split("\\|")[4]);
259
                    //commnet.addNode(d1, d2, conf);
260
                    commnet.addEdge(d1, d2);
261
                }
262
            }
263
            
264
        }
265
        br.close();
266
        
267
        return commnet;
268
    }
269
    
270
    
271
    public static DisGraph createRandNet(String filename) throws IOException {
272
        DisGraph commnet = createCommNet(filename);
273
        DisGraph randnet = (new RandGraph(commnet)).getRandGraph();
274
        return randnet;
275
    }
276
    
277
    
278
    public static DisGraph createERNet(String filename) throws IOException {
279
        DisGraph commnet = createCommNet(filename);
280
        int V = commnet.getNodes();
281
        double p = (double) V / (V*(V-1)/2);
282
        System.out.println("Probability: " + p);
283
        DisGraph randnet = GraphGenerator.simple(V, p);
284
        return randnet;
285
    }
286
    
287
    public static DisGraph createCommNetFromAdjFile(String netfile, String adjfile) throws IOException {
288
        DisGraph commnet = createCommNet(netfile);
289
        
290
        DisGraph adjnet = new DisGraph(commnet.getNodes());
291
        
292
        BufferedReader br = new BufferedReader(new FileReader(new File(adjfile)));
293
        String line = null;
294
        int num_line = 0;
295
        while ((line=br.readLine()) != null) {
296
            String[] parts = line.split("\\|");
297
            for (int i=0; i<parts.length; i++) {
298
                int entry = Integer.parseInt(parts[i]);
299
                if (entry==1) {
300
                    adjnet.addEdge(num_line, i);
301
                }
302
            }
303
            num_line++;
304
        }
305
        br.close();
306
        return adjnet;
307
    }
308
    
309
    
310
    public static List<String> getNeibors(String dis, DisGraph dg) {
311
        
312
        String dis_id = CommLoader.nameidmap.get(dis);
313
        int dis_idx = CommLoader.disease_index.get(dis_id);
314
        
315
        SparseVector sv = dg.getNeibor(dis_idx);
316
        List<String> neibors = new ArrayList<>();
317
        for (int j : sv.getKeys()) {
318
            String id = CommLoader.index_disease.get(j);
319
            neibors.add(CommLoader.idnamemap.get(id));
320
        }
321
        Collections.sort(neibors);
322
        return neibors;
323
    }
324
    
325
    public static Map <Integer, Integer> getDegDistri(DisGraph dg) {
326
        Map <Integer, Integer> dis_neibor = new HashMap<>();
327
        SparseMatrix commMatrix = dg.getNet();
328
        for (int i=0; i<commMatrix.size(); i++) {
329
            dis_neibor.put(i, commMatrix.getNeibor(i).nnz());
330
        }
331
        
332
        return dis_neibor;
333
    }
334
    
335
    public static List<String> compareGraph(DisGraph dg, DisGraph randgraph,  String disname1, String disname2) {
336
        
337
        List<String> neibors_original = getNeibors(disname1, dg);
338
        
339
        System.out.println("\nComorbidities of " + disname1 + " in original graph:" + neibors_original.size() + "\n");
340
        for (String d : neibors_original) {
341
            System.out.println(d);
342
        }
343
        
344
        
345
        List<String> neibors_random = getNeibors(disname2, randgraph);
346
        System.out.println("\nComorbidities of " + disname2 + " in random graph:" + neibors_random.size() + "\n" );
347
        for (String d : neibors_random) {
348
            System.out.println(d);
349
        }
350
        
351
        List<String> neibors_common = new ArrayList<>();
352
        if (neibors_original.size() >= neibors_random.size()) {
353
            for (String j: neibors_original) {
354
                if (neibors_random.contains(j)) {
355
                    neibors_common.add(j);
356
                }
357
            }
358
        } else {
359
            for (String j: neibors_random) {
360
                if (neibors_original.contains(j)) {
361
                    neibors_common.add(j);
362
                }
363
            }
364
        }
365
        
366
        Collections.sort(neibors_common);
367
        System.out.println("\nShared comorbidities of " + disname1 + "and " + disname2 + 
368
                " between original and random graphs :" + neibors_common.size() + "\n" );
369
        for (String d : neibors_common) {
370
            System.out.println(d);
371
        }
372
        return neibors_common;
373
    }
374
    
375
    
376
    /**
377
     * Write network to a txt file
378
     * @param omimnet a DisGraph
379
     * @param filename a file to be written
380
     * @throws IOException
381
     * dis1_UMLS| dis1_name|dis1_SOC|dis1_SOC_idx|dis2_UMLS|dis2_name|dis2_SOC|dis2_SOC_idx|conf
382
     */
383
    
384
    public static void writeCommNet(DisGraph commnet, String filename) throws IOException {
385
        BufferedWriter bw = new BufferedWriter(new FileWriter(new File(filename)));
386
        bw.write("#dis1_UMLS" + "|" + "dis1_name" + "|"+ "dis1_SOC" + "|" + "dis1_SOC_abbr" + "|" +  "dis1_SOC_idx" + "|" + 
387
        "dis2_UMLS" + "|" + "dis2_name" + "|"+ "dis2_SOC" + "|" +  "dis2_SOC_abbr" + "|" + "dis2_SOC_idx" + "|" + "conf" + "\n");
388
        for (int i = 0; i < commnet.getNet().size(); i++) {
389
            String d1 = index_disease.get(i);
390
            String d1_name = idNameConverter(d1, idnamemap);
391
            String d1_soc = umlssocmap.get(d1);
392
            String d1_soc_abbr = umlssocabbrmap.get(d1);
393
            int d1_soc_idx = 0;
394
            if (soc_idx.keySet().contains(d1_soc)) {
395
                d1_soc_idx = soc_idx.get(d1_soc);
396
            } else {
397
                d1_soc_idx = soc_idx.size();
398
            }
399
            
400
            Set<Integer> sv = commnet.getNet().getKey(i);
401
            for (int k : sv) {
402
                String d2 = index_disease.get(k);
403
                String d2_name = idNameConverter(d2, idnamemap);
404
                String d2_soc = umlssocmap.get(d2);
405
                String d2_soc_abbr = umlssocabbrmap.get(d2);
406
                int d2_soc_idx = 0;
407
                if (soc_idx.keySet().contains(d2_soc)) {
408
                    d2_soc_idx = soc_idx.get(d2_soc);
409
                } else {
410
                    d2_soc_idx = soc_idx.size();
411
                }
412
                
413
                double conf = commnet.getNet().get(i, k) ;
414
                bw.write(d1 + "|" + d1_name + "|"+ d1_soc + "|" + d1_soc_abbr + "|" + d1_soc_idx + "|" 
415
                + d2 + "|" + d2_name + "|"+ d2_soc + "|" + d2_soc_abbr + "|" + d2_soc_idx + "|" + conf + "\n");
416
            }
417
        }
418
        bw.close();
419
    }
420
    
421
422
    
423
    /**
424
     * Write disease node (UMLS) to a txt file
425
     * @param filename a txt file to be written
426
     * @throws IOException
427
     */
428
    private static void writeTerms(String filename) throws IOException {
429
        BufferedWriter bw = new BufferedWriter(new FileWriter(new File(filename)));
430
        for (String dis : disease_index.keySet()) {
431
            bw.write(dis + "\n");
432
        }
433
        bw.close();
434
    }
435
    
436
    /**
437
     * Write degree for all disease nodes to a file
438
     * @param filename a file to be written
439
     * @throws IOException
440
     */
441
    public static void writeNodeDeg(DisGraph commnet, String filename) throws IOException {
442
        BufferedWriter bw = new BufferedWriter(new FileWriter(new File(filename)));
443
        Map <Integer, Integer> dis_neibor = getDegDistri(commnet);
444
        
445
        bw.write("disease" + "|" + "numofdeg" + "index" + "\n");;
446
        for (int k : dis_neibor.keySet()) {
447
            String dis = idNameConverter(index_disease.get(k), idnamemap);
448
            int deg = dis_neibor.get(k);
449
            bw.write(dis + "|" + deg + "|" + k + "\n");
450
        }
451
        bw.close();
452
    }
453
454
    /**
455
     * Converts a disease UMLS ID to a disease concept name
456
     * @param id a UMLS ID
457
     * @param idnamemap a Map from UMLS to name
458
     * @return a disease name
459
     */
460
    private static String idNameConverter(String id, Map<String, String> idnamemap) {
461
        return idnamemap.get(id);
462
    }
463
464
    /**
465
     * Creates a map from UMLS to disease concept name
466
     * @param mapfile a file containing map information from UMLS to disease concept name
467
     * @throws IOException
468
     */
469
    public static void createIdNameMap(String mapfile) throws IOException {
470
        
471
        BufferedReader br = new BufferedReader(new FileReader(mapfile));
472
        String line = null;
473
        while((line = br.readLine()) != null){
474
            String[] parts = line.split("\\|");
475
            idnamemap.put(parts[0], parts[1].toLowerCase());
476
            nameidmap.put(parts[1].toLowerCase(), parts[0]);
477
        }
478
        br.close();
479
    }
480
    
481
            
482
    /*
483
     * main method  
484
     */
485
    
486
    public static void main(String[] args) throws IOException {
487
        /* Create disease comorbidity network
488
         * Write network to a file
489
         */
490
//      **************** rule file *************************
491
        
492
        String rulefile = "./data/ID_indications_all_clean_width_umls_id_diso_filtered_sp_lift_1_0.000002_3_processed.txt";
493
494
//      **************** UMLS id to disease name map file file *************************
495
496
        String mapfile = "./data/umls_id_name_diso";        
497
        
498
//      **************** network file to be saved *************************
499
        String netfile = "./results/fares_comm_net_conf_final_abbr.txt";
500
//      String netfile = "/Users/zhengc/workspace/FARES_final/analysis/network/DCN/fares_comm_net_lift_final_abbr.txt";
501
502
//      **************** term file to be saved *************************
503
        String termfile = "./results/fares_comm_net_term_lift_final_public.txt";    
504
505
        
506
        
507
//      ####################################################################
508
//      ------------------ Build and save network to files ----------------
509
//      ####################################################################
510
        
511
        readAssocRules(rulefile);
512
        readUMLSSOC("/Users/zhengc/workspace/FARES_final/analysis/Cluster/umls_soc.txt");
513
514
        DisGraph commnet = createCommNet(rulefile);
515
516
        //      System.out.println(commnet.getNet());
517
        System.out.println("Nodes: " + commnet.getNodes());
518
        System.out.println("Edges: " + commnet.getEdges());
519
        
520
521
        createIdNameMap(mapfile);
522
        
523
        writeCommNet(commnet, netfile);
524
        writeTerms(termfile);
525
526
    }
527
}   
528
529