Switch to side-by-side view

--- a
+++ b/graph_algorithm/DeNovoPred.java
@@ -0,0 +1,217 @@
+package GraphAlgorithm;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import network.CommGeneticsLoader;
+import network.CommLoader;
+import network.DisGraph;
+import network.SparseVector;
+import util.DCNOMIMUMLSIDmap;
+
+public class DeNovoPred {
+	
+	private Map<String, Integer> generank = new HashMap<>();
+	
+	/**
+	 * Initializes a new DovovoPrediction object
+	 * @param cgg a CommGeneticsGraph
+	 * @param dis the disease whose genes will be de novo predicted 	
+	 * @throws IOException if dis doesn't exist
+	 */
+	public DeNovoPred(DisGraph cgg, String dis, double p) throws IOException{
+		// check if the disease is in disease comorbidity network
+		String dis_id = null;
+		if (DCNOMIMUMLSIDmap.dcnnameidmap.containsKey(dis)) {
+			dis_id = DCNOMIMUMLSIDmap.dcnnameidmap.get(dis);
+		} else {
+			System.out.println("No such disease in this network!");
+		}
+		// Critical part for removing all disease gene edges
+		 int dis_idx = CommGeneticsLoader.entry_index.get(dis_id); // for CommGeneticsloader from netfile
+		// int dis_idx = CommLoader.disease_index.get(dis_id); // for CommGeneticsloader from net
+		System.out.printf("Disease: %s\n", dis);
+		System.out.printf("Disease index: %s\n", dis_idx);
+		System.out.printf("Disease id: %s\n", dis_id);
+		System.out.println("Disease_gene edges before removing: " + cgg.getNet().getNeibor(dis_idx));
+		cgg.removeEdges(dis_idx); // key code for do novo prediction
+		System.out.println("Disease_gene edges after removing: " + cgg.getNet().getNeibor(dis_idx));
+		
+		// Random walk to rank disease genes
+		List<String> dislist = new ArrayList<>();
+		dislist.add(dis_id);
+		SparseVector seed = RandomWalk.createSeedVector(dislist);
+		RandomWalk rm = new RandomWalk(cgg, seed, p);
+		generank = rm.getRWRank();
+	}
+	
+	/**
+	 * Get de novo prediction gene ranks for specific disease
+	 * @return de novo prediction gene ranks for specific disease
+	 */
+	public Map<String, Integer> getDoNovo() {
+		return generank;
+	}
+	
+	/**
+	 * Write gene-rank pairs into a CSV file, format is "Gene,Rank,Percentage"
+	 * @param dn de novo prediction map
+	 * @param dnfile a file to be written
+	 * @throws IOException
+	 */
+	public void saveDoNovo(Map<String, Integer> dn, String dnfile) throws IOException {
+		BufferedWriter bw = new BufferedWriter(new FileWriter(new File(dnfile)));
+		bw.write("Gene" + ","  + "Rank" + "," + "Percentage" +  "\n");
+		for (String gene : dn.keySet()) {
+//			System.out.println(sortedgenemap.get(s));
+			int rank = dn.get(gene);
+			double perc = rank * 1.0 / CommGeneticsLoader.num_gene * 100;
+			bw.write(gene + "," + rank + "," + perc + "\n");
+		}
+		bw.close();
+	}
+	
+	public static void main(String[] args) throws IOException {
+		
+		// step 1: build DCN-PPI network
+		System.out.println("Building DCN network...");
+		
+//		String commnetfile = args[0];
+//		String ppifile = args[1];
+//		String disgenefile = args[2];
+		
+		String commnetfile = "./data/fares_comm_net_lift_final_abbr.txt";
+		String ppifile = "./data/gene_gene_string_cut.txt";
+		String disgenefile = "./data/OMIM_disease_gene_umls_id_diso";
+		
+		
+//		String dcnmapfile = args[3];
+//		String omimmapfile = args[4];
+		
+		String dcnmapfile = "./data/term_umls_id_diso";
+		String omimmapfile = "./data/OMIM_umls_id_diso";
+		util.DCNOMIMUMLSIDmap.createDCNIdNameMap(dcnmapfile);
+		util.DCNOMIMUMLSIDmap.createOMIMIdNameMap(omimmapfile);
+		String DCN_dis = "dementia";
+		String OMIM_pat = "alzheimer";
+		DCNOMIMUMLSIDmap domim = new DCNOMIMUMLSIDmap(DCN_dis, OMIM_pat);
+		Map<String, List<String>> dcn_omim = domim.getDCNOMIMUMLSIDmap();
+		
+		DisGraph cgg = null;
+		try {
+			cgg = CommGeneticsLoader.createCommGeneticsGraph(commnetfile, ppifile, disgenefile, dcn_omim);
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		
+		System.out.println("Done!");
+		
+		
+		// step 2: optimize restart probability
+//		String rulefile = args[5];
+//		String donovoDir = args[6]
+//		String predType = args[7]
+				
+		String rulefile = "./data/ID_indications_all_clean_width_umls_id_diso_filtered_sp_lift_1_0.000002_3_processed.txt";
+		String donovoDir = "./results/evaluation/";
+		String predType = "pred" //"optimize", "random" or "pred"
+		
+		if (preType=="optimize") {
+			/**
+			 * do novo prediction for different restart probabilities
+			 */
+			System.out.println("Starting do novo gene prediction for " + DCN_dis + "!");
+			List<Double> ps = new ArrayList<Double>();
+			for(int i=1; i<10; i++){
+				ps.add(i * 0.1);
+			}
+			
+			int count = 1;
+			for(double p : ps){
+				System.out.println("\nTesting " + count + " of " + ps.size());
+				DeNovoPred dnp = new DeNovoPred(cgg, DCN_dis, p);
+				Map<String, Integer> generank = dnp.getDoNovo();
+				String donovofile = donovoDir + "commnet_opti_" + P + DCN_dis  + ".csv";
+				dnp.saveDoNovo(generank, donovofile);
+				count++;
+			}
+		}
+		
+		// step 3: get gene ranks for randomized network
+		if (preType=="random") {
+//	 		#########################################################################
+//			----------------------- Method: Erdos-Renyi graph ---------------------------------
+//			#########################################################################
+			System.out.println("\nStaring random node do novo prediction......\n");
+		
+//			/*
+//			 * Build the bipartite network
+	
+			CommLoader.readAssocRules(rulefile);
+			DisGraph commnet = CommLoader.createCommNet(rulefile);
+			int V = commnet.getNodes();
+			double p = (double) V / (V*(V-1)/2);
+			System.out.println("Probability: " + p);
+			
+			CommLoader.readPPI(ppifile);
+			DisGraph ppinet = CommLoader.createPPINet(ppifile);
+			
+			List<String> entry_list = new ArrayList<String>();
+			List<String> disease_list = new ArrayList<String>();
+			disease_list = util.CollectionsEx.setToList(CommLoader.disease_set);
+			entry_list.addAll(disease_list);
+			entry_list.addAll(CommLoader.gene_list);
+			CommGeneticsLoader.createIndex(entry_list);
+			CommGeneticsLoader.num_disease = disease_list.size();
+			CommGeneticsLoader.entry_list = entry_list;
+			CommGeneticsLoader.num_gene = CommLoader.gene_list.size();
+			
+			
+			Map<Integer, List<Integer>> dis_gene = CommLoader.readDisGene(disgenefile, dcn_omim);
+			
+			int N = 1000;
+			double P = 0.5;
+			
+			for (int i=0; i<N; i++) {
+				System.out.println();
+				if (i > 10 && i % 100 == 0) {
+					System.out.printf("Finish %d of %d prediction!", i, N);
+				}
+				
+				DisGraph randnet = GraphGenerator.simple(V, p);
+				System.out.println("Total nodes in randamized disease graph: " + randnet.getNodes());
+				System.out.println("Total edges in randomized disease graph: " + randnet.getEdges());
+				DisGraph cgg = CommGeneticsLoader.createCommGeneticsGraph(randnet, ppinet, dis_gene); 
+				System.out.println("Total nodes in randamized heterogeneous graph: " + cgg.getNodes());
+				System.out.println("Total edges in randomized heterogeneous graph: " + cgg.getEdges());
+				DeNovoPred dnp = new DeNovoPred(cgg, DCN_dis, P); //!speed limit operation
+				Map<String, Integer> generank = dnp.getDoNovo();
+				
+				System.out.println("Size of gene rank:" + generank.size());
+				
+				
+				String donovofile = donovoDir + "random_er_" + P + "_" + i + "_" + DCN_dis  + ".csv";
+				dnp.saveDoNovo(generank, donovofile);
+
+			}
+		}
+		
+		//Step 4: get gene rank from DCN
+		if (preType=="pred") {
+			DeNovoPred dnp = new DeNovoPred(cgg, DCN_dis, 0.5); 
+			Map<String, Integer> generank = dnp.getDoNovo();
+			
+			System.out.println("Size of gene rank:" + generank.size());
+			String donovofile = donovoDir + "commnet_" + 0.5 + "_" + DCN_dis  + ".csv";
+			dnp.saveDoNovo(generank, donovofile);
+		}
+		
+	}
+
+}