--- a +++ b/graph_algorithm/DeNovoPred.java @@ -0,0 +1,217 @@ +package GraphAlgorithm; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import network.CommGeneticsLoader; +import network.CommLoader; +import network.DisGraph; +import network.SparseVector; +import util.DCNOMIMUMLSIDmap; + +public class DeNovoPred { + + private Map<String, Integer> generank = new HashMap<>(); + + /** + * Initializes a new DovovoPrediction object + * @param cgg a CommGeneticsGraph + * @param dis the disease whose genes will be de novo predicted + * @throws IOException if dis doesn't exist + */ + public DeNovoPred(DisGraph cgg, String dis, double p) throws IOException{ + // check if the disease is in disease comorbidity network + String dis_id = null; + if (DCNOMIMUMLSIDmap.dcnnameidmap.containsKey(dis)) { + dis_id = DCNOMIMUMLSIDmap.dcnnameidmap.get(dis); + } else { + System.out.println("No such disease in this network!"); + } + // Critical part for removing all disease gene edges + int dis_idx = CommGeneticsLoader.entry_index.get(dis_id); // for CommGeneticsloader from netfile + // int dis_idx = CommLoader.disease_index.get(dis_id); // for CommGeneticsloader from net + System.out.printf("Disease: %s\n", dis); + System.out.printf("Disease index: %s\n", dis_idx); + System.out.printf("Disease id: %s\n", dis_id); + System.out.println("Disease_gene edges before removing: " + cgg.getNet().getNeibor(dis_idx)); + cgg.removeEdges(dis_idx); // key code for do novo prediction + System.out.println("Disease_gene edges after removing: " + cgg.getNet().getNeibor(dis_idx)); + + // Random walk to rank disease genes + List<String> dislist = new ArrayList<>(); + dislist.add(dis_id); + SparseVector seed = RandomWalk.createSeedVector(dislist); + RandomWalk rm = new RandomWalk(cgg, seed, p); + generank = rm.getRWRank(); + } + + /** + * Get de novo prediction gene ranks for specific disease + * @return de novo prediction gene ranks for specific disease + */ + public Map<String, Integer> getDoNovo() { + return generank; + } + + /** + * Write gene-rank pairs into a CSV file, format is "Gene,Rank,Percentage" + * @param dn de novo prediction map + * @param dnfile a file to be written + * @throws IOException + */ + public void saveDoNovo(Map<String, Integer> dn, String dnfile) throws IOException { + BufferedWriter bw = new BufferedWriter(new FileWriter(new File(dnfile))); + bw.write("Gene" + "," + "Rank" + "," + "Percentage" + "\n"); + for (String gene : dn.keySet()) { +// System.out.println(sortedgenemap.get(s)); + int rank = dn.get(gene); + double perc = rank * 1.0 / CommGeneticsLoader.num_gene * 100; + bw.write(gene + "," + rank + "," + perc + "\n"); + } + bw.close(); + } + + public static void main(String[] args) throws IOException { + + // step 1: build DCN-PPI network + System.out.println("Building DCN network..."); + +// String commnetfile = args[0]; +// String ppifile = args[1]; +// String disgenefile = args[2]; + + String commnetfile = "./data/fares_comm_net_lift_final_abbr.txt"; + String ppifile = "./data/gene_gene_string_cut.txt"; + String disgenefile = "./data/OMIM_disease_gene_umls_id_diso"; + + +// String dcnmapfile = args[3]; +// String omimmapfile = args[4]; + + String dcnmapfile = "./data/term_umls_id_diso"; + String omimmapfile = "./data/OMIM_umls_id_diso"; + util.DCNOMIMUMLSIDmap.createDCNIdNameMap(dcnmapfile); + util.DCNOMIMUMLSIDmap.createOMIMIdNameMap(omimmapfile); + String DCN_dis = "dementia"; + String OMIM_pat = "alzheimer"; + DCNOMIMUMLSIDmap domim = new DCNOMIMUMLSIDmap(DCN_dis, OMIM_pat); + Map<String, List<String>> dcn_omim = domim.getDCNOMIMUMLSIDmap(); + + DisGraph cgg = null; + try { + cgg = CommGeneticsLoader.createCommGeneticsGraph(commnetfile, ppifile, disgenefile, dcn_omim); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + System.out.println("Done!"); + + + // step 2: optimize restart probability +// String rulefile = args[5]; +// String donovoDir = args[6] +// String predType = args[7] + + String rulefile = "./data/ID_indications_all_clean_width_umls_id_diso_filtered_sp_lift_1_0.000002_3_processed.txt"; + String donovoDir = "./results/evaluation/"; + String predType = "pred" //"optimize", "random" or "pred" + + if (preType=="optimize") { + /** + * do novo prediction for different restart probabilities + */ + System.out.println("Starting do novo gene prediction for " + DCN_dis + "!"); + List<Double> ps = new ArrayList<Double>(); + for(int i=1; i<10; i++){ + ps.add(i * 0.1); + } + + int count = 1; + for(double p : ps){ + System.out.println("\nTesting " + count + " of " + ps.size()); + DeNovoPred dnp = new DeNovoPred(cgg, DCN_dis, p); + Map<String, Integer> generank = dnp.getDoNovo(); + String donovofile = donovoDir + "commnet_opti_" + P + DCN_dis + ".csv"; + dnp.saveDoNovo(generank, donovofile); + count++; + } + } + + // step 3: get gene ranks for randomized network + if (preType=="random") { +// ######################################################################### +// ----------------------- Method: Erdos-Renyi graph --------------------------------- +// ######################################################################### + System.out.println("\nStaring random node do novo prediction......\n"); + +// /* +// * Build the bipartite network + + CommLoader.readAssocRules(rulefile); + DisGraph commnet = CommLoader.createCommNet(rulefile); + int V = commnet.getNodes(); + double p = (double) V / (V*(V-1)/2); + System.out.println("Probability: " + p); + + CommLoader.readPPI(ppifile); + DisGraph ppinet = CommLoader.createPPINet(ppifile); + + List<String> entry_list = new ArrayList<String>(); + List<String> disease_list = new ArrayList<String>(); + disease_list = util.CollectionsEx.setToList(CommLoader.disease_set); + entry_list.addAll(disease_list); + entry_list.addAll(CommLoader.gene_list); + CommGeneticsLoader.createIndex(entry_list); + CommGeneticsLoader.num_disease = disease_list.size(); + CommGeneticsLoader.entry_list = entry_list; + CommGeneticsLoader.num_gene = CommLoader.gene_list.size(); + + + Map<Integer, List<Integer>> dis_gene = CommLoader.readDisGene(disgenefile, dcn_omim); + + int N = 1000; + double P = 0.5; + + for (int i=0; i<N; i++) { + System.out.println(); + if (i > 10 && i % 100 == 0) { + System.out.printf("Finish %d of %d prediction!", i, N); + } + + DisGraph randnet = GraphGenerator.simple(V, p); + System.out.println("Total nodes in randamized disease graph: " + randnet.getNodes()); + System.out.println("Total edges in randomized disease graph: " + randnet.getEdges()); + DisGraph cgg = CommGeneticsLoader.createCommGeneticsGraph(randnet, ppinet, dis_gene); + System.out.println("Total nodes in randamized heterogeneous graph: " + cgg.getNodes()); + System.out.println("Total edges in randomized heterogeneous graph: " + cgg.getEdges()); + DeNovoPred dnp = new DeNovoPred(cgg, DCN_dis, P); //!speed limit operation + Map<String, Integer> generank = dnp.getDoNovo(); + + System.out.println("Size of gene rank:" + generank.size()); + + + String donovofile = donovoDir + "random_er_" + P + "_" + i + "_" + DCN_dis + ".csv"; + dnp.saveDoNovo(generank, donovofile); + + } + } + + //Step 4: get gene rank from DCN + if (preType=="pred") { + DeNovoPred dnp = new DeNovoPred(cgg, DCN_dis, 0.5); + Map<String, Integer> generank = dnp.getDoNovo(); + + System.out.println("Size of gene rank:" + generank.size()); + String donovofile = donovoDir + "commnet_" + 0.5 + "_" + DCN_dis + ".csv"; + dnp.saveDoNovo(generank, donovofile); + } + + } + +}