Diff of /network/CommLoader.java [000000] .. [4fba4e]

Switch to side-by-side view

--- a
+++ b/network/CommLoader.java
@@ -0,0 +1,529 @@
+package network;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import GraphAlgorithm.GraphGenerator;
+import GraphAlgorithm.RandGraph;
+
+/**
+ * Provides methods for building a disese comorbidity network (DCN) from FAERS \
+ * and save it into a file
+ * 
+ * @author zhengc
+ *
+ */
+public class CommLoader {
+	public static HashSet<String> disease_set = new HashSet<String>();
+	public static HashMap<String, Integer> disease_index = new HashMap<String, Integer>();
+	public static HashMap<Integer, String> index_disease = new HashMap<Integer, String>();
+	public static Map<String, String> idnamemap = new HashMap<String, String>();
+	public static Map<String, String> nameidmap = new HashMap<String, String>();
+	public static HashSet<String> soc_set = new HashSet<String>();
+	public static Map<String, String> umlssocmap = new HashMap<String, String>();
+	public static Map<String, String> umlssocabbrmap = new HashMap<String, String>();
+	public static Map<String, Integer> soc_idx = new HashMap<String, Integer>();
+	public static Map<Integer, String> idx_soc = new HashMap<Integer,String>();
+	public static final double P = 0.85;
+	
+	public static HashSet<String> gene_set = new HashSet<String>();
+	public static List<String> gene_list = new ArrayList<String>();
+	public static HashMap<String, Integer> gene_index = new HashMap<String, Integer>();
+	public static HashMap<Integer, String> index_gene = new HashMap<Integer, String>();
+	
+	
+	
+	/**
+	 * Reads an association rule file to create necessary map file for building DCN
+	 * @param filename an association rule file 
+	 * @throws IOException
+	 */
+	public static void readAssocRules(String filename) throws IOException{
+		// get all unique disease set
+		BufferedReader br = new BufferedReader(new FileReader(new File(filename)));
+		String line = br.readLine();
+		while ((line = br.readLine()) != null) {
+			line = line.replace("[", "");
+			line = line.replace("]", "");
+//			System.out.println(line);
+			String[] symptoms = line.split("\\|")[0].split(",");
+//			System.out.println(symptoms.length);
+			String[] diseases = line.split("\\|")[2].split(",");
+			
+			for (int i = 0; i < symptoms.length; i++) {
+				symptoms[i] = symptoms[i].trim();
+				disease_set.add(symptoms[i]);
+			}
+			for (int j = 0; j < diseases.length; j++) {
+				diseases[j] = diseases[j].trim();
+				disease_set.add(diseases[j]);
+			}
+		}
+		br.close();
+		
+		int m = 0; 
+		for (String d: disease_set) {
+				disease_index.put(d, m);
+				index_disease.put(m, d);
+				m++;
+		}
+	}
+	
+	public static void readUMLSSOC(String filename) throws IOException{
+		BufferedReader br = new BufferedReader(new FileReader(new File(filename)));
+		String line = null;
+		while ((line=br.readLine()) != null) {
+			String[] parts = line.split("\\$");
+			String umls = parts[0];
+			String soc = parts[8];
+			String soc_abbr = parts[9];
+			soc_set.add(soc);
+			umlssocmap.put(umls, soc);
+			umlssocabbrmap.put(umls, soc_abbr);
+		}
+		br.close();
+		
+		int m = 0;
+		for (String soc : soc_set) {
+			soc_idx.put(soc, m);
+			idx_soc.put(m, soc);
+			m++;
+		}
+		
+	}
+	
+	
+	public static void readPPI(String filename) throws IOException{
+		BufferedReader br = new BufferedReader(new FileReader(new File(filename)));
+		String line = null;
+		while ((line=br.readLine()) != null) {
+			String gene = line.split("\\|")[0];
+//			System.out.println(2);
+			gene_set.add(gene);
+		}
+		br.close();
+		gene_list = util.CollectionsEx.setToList(gene_set);
+		
+		int m = 0; 
+		for (String d: gene_set) {
+				gene_index.put(d, m);
+				index_gene.put(m, d);
+				m++;
+		}
+		
+	}
+	
+	
+	public static DisGraph createPPINet(String ppifile) throws IOException {
+		File file = new File(ppifile);
+		BufferedReader br = new BufferedReader(new FileReader(file));
+		
+		int n = gene_index.size();
+		DisGraph ppinet = new DisGraph(n);
+		
+		String line = br.readLine();
+		while ((line = br.readLine()) != null) {
+			String[] parts = line.split("\\|");
+			String dis1 = parts[0];
+			int d1 = gene_index.get(dis1);
+//			System.out.println(symptoms.length);
+			String dis2 = parts[1];
+			int d2 = gene_index.get(dis2);
+			ppinet.addEdge(d1, d2);
+		}
+		br.close();
+		return ppinet;
+	}
+	
+	
+	public static Map<Integer, List<Integer>> readDisGene(String disgenefile, 
+			Map<String, List<String>> dcn_omim) throws IOException{
+		
+		Map<Integer, List<Integer>> dis_gene = new HashMap<>();
+		BufferedReader br = new BufferedReader(new FileReader(new File(disgenefile)));
+		String line = null;
+		
+		while ((line = br.readLine()) != null) {
+			String [] s = line.split("\\|");
+//			System.out.println(s[0]);
+			String dis = s[0];
+			String gene = s[1];
+			if (disease_index.containsKey(dis)) {
+				int dis_idx = disease_index.get(dis);
+				if (gene_index.containsKey(gene)) {
+					int gene_idx = gene_index.get(gene);
+					if (!dis_gene.containsKey(dis_idx)) {
+						List<Integer> genes = new ArrayList<Integer>();
+						genes.add(gene_idx);
+						dis_gene.put(dis_idx, genes);
+					} else {
+						dis_gene.get(dis_idx).add(gene_idx);
+					}
+				}
+			} else {
+				for (String id : dcn_omim.keySet()) {
+					List<String> omim_ids = dcn_omim.get(id);
+					if (omim_ids.contains(dis)) {
+						dis = id; // update id
+//						System.out.println(dis);
+						if (disease_index.containsKey(dis) && gene_index.containsKey(gene)) {
+							int dis_idx = disease_index.get(dis);
+							int gene_idx = gene_index.get(gene);
+							if (!dis_gene.containsKey(dis_idx)) {
+								List<Integer> genes = new ArrayList<Integer>();
+								genes.add(gene_idx);
+								dis_gene.put(dis_idx, genes);
+							} else {
+								dis_gene.get(dis_idx).add(gene_idx);
+							}
+						}
+					}
+				}
+			}
+			
+			// add additional dcn_omim map
+			
+			
+			
+//			for (String id : dcn_omim.keySet()) {
+//				List<String> omim_ids = dcn_omim.get(id);
+//				if (!disease_index.containsKey(dis) && omim_ids.contains(dis)) {
+//					dis = id; // update id
+//					if (disease_index.containsKey(dis) && gene_index.containsKey(gene)) {
+//						int dis_idx = disease_index.get(dis);
+//						int gene_idx = gene_index.get(gene);
+//						dis_gene.get(dis_idx).add(gene_idx);
+//					}
+//				}
+//			}
+		}
+		br.close();
+		return dis_gene;
+	} 
+	
+	/**
+	 * Builds a DCN from an association rule file
+	 * @param filename an association rule file
+	 * @return an undirected and unweighted DisGraph
+	 * @throws IOException
+	 */
+	public static DisGraph createCommNet(String filename) throws IOException{
+		
+		File file = new File(filename);
+		BufferedReader br = new BufferedReader(new FileReader(file));
+		
+		int n = disease_index.size();
+		DisGraph commnet = new DisGraph(n);
+		
+		String line = br.readLine();
+		while ((line = br.readLine()) != null) {
+		
+			line = line.replace("[", "");
+			line = line.replace("]", "");
+		
+			//undirected, unweighted network 
+		 
+			String[] disease1 = line.split("\\|")[0].split(",");
+			List<String> diseases_raw = new ArrayList<String>(Arrays.asList(disease1));
+			String[] disease2 = line.split("\\|")[2].split(",");
+			diseases_raw.addAll(Arrays.asList(disease2));
+			
+			List<String> diseases = new ArrayList<String>();
+			for (String d:diseases_raw) {
+				d = d.trim();
+				diseases.add(d);
+			}
+			
+			for (int i=0; i<diseases.size() - 1; i++) {
+				//System.out.println(diseases.get(i));
+				int d1 = disease_index.get(diseases.get(i));
+				for (int j=i+1; j<diseases.size(); j++) {
+					int d2 = disease_index.get(diseases.get(j));
+					//double conf = Double.parseDouble(line.split("\\|")[4]);
+					//commnet.addNode(d1, d2, conf);
+					commnet.addEdge(d1, d2);
+				}
+			}
+			
+		}
+		br.close();
+		
+		return commnet;
+	}
+	
+	
+	public static DisGraph createRandNet(String filename) throws IOException {
+		DisGraph commnet = createCommNet(filename);
+		DisGraph randnet = (new RandGraph(commnet)).getRandGraph();
+		return randnet;
+	}
+	
+	
+	public static DisGraph createERNet(String filename) throws IOException {
+		DisGraph commnet = createCommNet(filename);
+		int V = commnet.getNodes();
+		double p = (double) V / (V*(V-1)/2);
+		System.out.println("Probability: " + p);
+		DisGraph randnet = GraphGenerator.simple(V, p);
+		return randnet;
+	}
+	
+	public static DisGraph createCommNetFromAdjFile(String netfile, String adjfile) throws IOException {
+		DisGraph commnet = createCommNet(netfile);
+		
+		DisGraph adjnet = new DisGraph(commnet.getNodes());
+		
+		BufferedReader br = new BufferedReader(new FileReader(new File(adjfile)));
+		String line = null;
+		int num_line = 0;
+		while ((line=br.readLine()) != null) {
+			String[] parts = line.split("\\|");
+			for (int i=0; i<parts.length; i++) {
+				int entry = Integer.parseInt(parts[i]);
+				if (entry==1) {
+					adjnet.addEdge(num_line, i);
+				}
+			}
+			num_line++;
+		}
+		br.close();
+		return adjnet;
+	}
+	
+	
+	public static List<String> getNeibors(String dis, DisGraph dg) {
+		
+		String dis_id = CommLoader.nameidmap.get(dis);
+		int dis_idx = CommLoader.disease_index.get(dis_id);
+		
+		SparseVector sv = dg.getNeibor(dis_idx);
+		List<String> neibors = new ArrayList<>();
+		for (int j : sv.getKeys()) {
+			String id = CommLoader.index_disease.get(j);
+			neibors.add(CommLoader.idnamemap.get(id));
+		}
+		Collections.sort(neibors);
+		return neibors;
+	}
+	
+	public static Map <Integer, Integer> getDegDistri(DisGraph dg) {
+		Map <Integer, Integer> dis_neibor = new HashMap<>();
+		SparseMatrix commMatrix = dg.getNet();
+		for (int i=0; i<commMatrix.size(); i++) {
+			dis_neibor.put(i, commMatrix.getNeibor(i).nnz());
+		}
+		
+		return dis_neibor;
+	}
+	
+	public static List<String> compareGraph(DisGraph dg, DisGraph randgraph,  String disname1, String disname2) {
+		
+		List<String> neibors_original = getNeibors(disname1, dg);
+		
+		System.out.println("\nComorbidities of " + disname1 + " in original graph:" + neibors_original.size() + "\n");
+		for (String d : neibors_original) {
+			System.out.println(d);
+		}
+		
+		
+		List<String> neibors_random = getNeibors(disname2, randgraph);
+		System.out.println("\nComorbidities of " + disname2 + " in random graph:" + neibors_random.size() + "\n" );
+		for (String d : neibors_random) {
+			System.out.println(d);
+		}
+		
+		List<String> neibors_common = new ArrayList<>();
+		if (neibors_original.size() >= neibors_random.size()) {
+			for (String j: neibors_original) {
+				if (neibors_random.contains(j)) {
+					neibors_common.add(j);
+				}
+			}
+		} else {
+			for (String j: neibors_random) {
+				if (neibors_original.contains(j)) {
+					neibors_common.add(j);
+				}
+			}
+		}
+		
+		Collections.sort(neibors_common);
+		System.out.println("\nShared comorbidities of " + disname1 + "and " + disname2 + 
+				" between original and random graphs :" + neibors_common.size() + "\n" );
+		for (String d : neibors_common) {
+			System.out.println(d);
+		}
+		return neibors_common;
+	}
+	
+	
+	/**
+	 * Write network to a txt file
+	 * @param omimnet a DisGraph
+	 * @param filename a file to be written
+	 * @throws IOException
+	 * dis1_UMLS| dis1_name|dis1_SOC|dis1_SOC_idx|dis2_UMLS|dis2_name|dis2_SOC|dis2_SOC_idx|conf
+	 */
+	
+	public static void writeCommNet(DisGraph commnet, String filename) throws IOException {
+		BufferedWriter bw = new BufferedWriter(new FileWriter(new File(filename)));
+		bw.write("#dis1_UMLS" + "|" + "dis1_name" + "|"+ "dis1_SOC" + "|" + "dis1_SOC_abbr" + "|" +  "dis1_SOC_idx" + "|" + 
+		"dis2_UMLS" + "|" + "dis2_name" + "|"+ "dis2_SOC" + "|" +  "dis2_SOC_abbr" + "|" + "dis2_SOC_idx" + "|" + "conf" + "\n");
+		for (int i = 0; i < commnet.getNet().size(); i++) {
+			String d1 = index_disease.get(i);
+			String d1_name = idNameConverter(d1, idnamemap);
+			String d1_soc = umlssocmap.get(d1);
+			String d1_soc_abbr = umlssocabbrmap.get(d1);
+			int d1_soc_idx = 0;
+			if (soc_idx.keySet().contains(d1_soc)) {
+				d1_soc_idx = soc_idx.get(d1_soc);
+			} else {
+				d1_soc_idx = soc_idx.size();
+			}
+			
+			Set<Integer> sv = commnet.getNet().getKey(i);
+			for (int k : sv) {
+				String d2 = index_disease.get(k);
+				String d2_name = idNameConverter(d2, idnamemap);
+				String d2_soc = umlssocmap.get(d2);
+				String d2_soc_abbr = umlssocabbrmap.get(d2);
+				int d2_soc_idx = 0;
+				if (soc_idx.keySet().contains(d2_soc)) {
+					d2_soc_idx = soc_idx.get(d2_soc);
+				} else {
+					d2_soc_idx = soc_idx.size();
+				}
+				
+				double conf = commnet.getNet().get(i, k) ;
+				bw.write(d1 + "|" + d1_name + "|"+ d1_soc + "|" + d1_soc_abbr + "|" + d1_soc_idx + "|" 
+				+ d2 + "|" + d2_name + "|"+ d2_soc + "|" + d2_soc_abbr + "|" + d2_soc_idx + "|" + conf + "\n");
+			}
+		}
+		bw.close();
+	}
+	
+
+	
+	/**
+	 * Write disease node (UMLS) to a txt file
+	 * @param filename a txt file to be written
+	 * @throws IOException
+	 */
+	private static void writeTerms(String filename) throws IOException {
+		BufferedWriter bw = new BufferedWriter(new FileWriter(new File(filename)));
+		for (String dis : disease_index.keySet()) {
+			bw.write(dis + "\n");
+		}
+		bw.close();
+	}
+	
+	/**
+	 * Write degree for all disease nodes to a file
+	 * @param filename a file to be written
+	 * @throws IOException
+	 */
+	public static void writeNodeDeg(DisGraph commnet, String filename) throws IOException {
+		BufferedWriter bw = new BufferedWriter(new FileWriter(new File(filename)));
+		Map <Integer, Integer> dis_neibor = getDegDistri(commnet);
+		
+		bw.write("disease" + "|" + "numofdeg" + "index" + "\n");;
+		for (int k : dis_neibor.keySet()) {
+			String dis = idNameConverter(index_disease.get(k), idnamemap);
+			int deg = dis_neibor.get(k);
+			bw.write(dis + "|" + deg + "|" + k + "\n");
+		}
+		bw.close();
+	}
+
+	/**
+	 * Converts a disease UMLS ID to a disease concept name
+	 * @param id a UMLS ID
+	 * @param idnamemap a Map from UMLS to name
+	 * @return a disease name
+	 */
+	private static String idNameConverter(String id, Map<String, String> idnamemap) {
+		return idnamemap.get(id);
+	}
+
+	/**
+	 * Creates a map from UMLS to disease concept name
+	 * @param mapfile a file containing map information from UMLS to disease concept name
+	 * @throws IOException
+	 */
+	public static void createIdNameMap(String mapfile) throws IOException {
+		
+		BufferedReader br = new BufferedReader(new FileReader(mapfile));
+		String line = null;
+		while((line = br.readLine()) != null){
+			String[] parts = line.split("\\|");
+			idnamemap.put(parts[0], parts[1].toLowerCase());
+			nameidmap.put(parts[1].toLowerCase(), parts[0]);
+		}
+		br.close();
+	}
+	
+			
+	/*
+	 * main method	
+	 */
+	
+	public static void main(String[] args) throws IOException {
+		/* Create disease comorbidity network
+		 * Write network to a file
+		 */
+//		**************** rule file *************************
+		
+		String rulefile = "./data/ID_indications_all_clean_width_umls_id_diso_filtered_sp_lift_1_0.000002_3_processed.txt";
+
+//		**************** UMLS id to disease name map file file *************************
+
+		String mapfile = "./data/umls_id_name_diso";		
+		
+//		**************** network file to be saved *************************
+		String netfile = "./results/fares_comm_net_conf_final_abbr.txt";
+//		String netfile = "/Users/zhengc/workspace/FARES_final/analysis/network/DCN/fares_comm_net_lift_final_abbr.txt";
+
+//		**************** term file to be saved *************************
+		String termfile = "./results/fares_comm_net_term_lift_final_public.txt";	
+
+		
+		
+//		####################################################################
+//		------------------ Build and save network to files ----------------
+//		####################################################################
+		
+		readAssocRules(rulefile);
+		readUMLSSOC("/Users/zhengc/workspace/FARES_final/analysis/Cluster/umls_soc.txt");
+
+		DisGraph commnet = createCommNet(rulefile);
+
+		//		System.out.println(commnet.getNet());
+		System.out.println("Nodes: " + commnet.getNodes());
+		System.out.println("Edges: " + commnet.getEdges());
+		
+
+		createIdNameMap(mapfile);
+ 		
+		writeCommNet(commnet, netfile);
+		writeTerms(termfile);
+
+	}
+}	
+
+