|
a |
|
b/dataprep/LongToWide.java |
|
|
1 |
package dataprep; |
|
|
2 |
|
|
|
3 |
import java.io.BufferedReader; |
|
|
4 |
import java.io.BufferedWriter; |
|
|
5 |
import java.io.File; |
|
|
6 |
import java.io.FileNotFoundException; |
|
|
7 |
import java.io.FileReader; |
|
|
8 |
import java.io.FileWriter; |
|
|
9 |
import java.io.IOException; |
|
|
10 |
import java.text.Normalizer; |
|
|
11 |
import java.util.ArrayList; |
|
|
12 |
import java.util.HashMap; |
|
|
13 |
import java.util.HashSet; |
|
|
14 |
import java.util.List; |
|
|
15 |
import java.util.Map; |
|
|
16 |
import java.util.Set; |
|
|
17 |
|
|
|
18 |
public class LongToWide { |
|
|
19 |
|
|
|
20 |
public static void main(String[] args) throws IOException { |
|
|
21 |
|
|
|
22 |
Map<Integer, Set<String>> pid_cids = new HashMap<>(); |
|
|
23 |
|
|
|
24 |
String lfname1 = "/Users/zhengc/Projects/AD_metaformin/data/medical_history/processed/ADNI1_umls_id_dsyn_2.csv"; |
|
|
25 |
// String lfname2 = "/Users/zhengc/Projects/AD_metaformin/data/medical_history/processed/ADNI3_umls_id_dsyn_2.csv"; |
|
|
26 |
String lfname3 = "/Users/zhengc/Projects/AD_metaformin/data/drug/processed/drug_umls_id.csv"; |
|
|
27 |
|
|
|
28 |
Map<Integer, Set<String>> pid_cids1 = getCids(lfname1); |
|
|
29 |
// Map<Integer, Set<String>> pid_cids2 = getCids(lfname2); |
|
|
30 |
Map<Integer, Set<String>> pid_cids3 = getCids(lfname3); |
|
|
31 |
|
|
|
32 |
Set<Integer> pids = pid_cids1.keySet(); |
|
|
33 |
Set<Integer> pid3 = pid_cids3.keySet(); |
|
|
34 |
pids.retainAll(pid3); |
|
|
35 |
|
|
|
36 |
for (int pid: pids) { |
|
|
37 |
Set<String> cids = pid_cids1.get(pid); |
|
|
38 |
Set<String> cids3 = pid_cids3.get(pid); |
|
|
39 |
cids.addAll(cids3); |
|
|
40 |
pid_cids.put(pid, cids); |
|
|
41 |
} |
|
|
42 |
|
|
|
43 |
|
|
|
44 |
|
|
|
45 |
String wfname = "/Users/zhengc/Projects/AD_metaformin/data/drug/processed/Comb_drug_mh_umls_id_width.csv"; |
|
|
46 |
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(wfname))); |
|
|
47 |
|
|
|
48 |
for (int pid: pid_cids.keySet()) { |
|
|
49 |
Set<String> cids = pid_cids.get(pid); |
|
|
50 |
StringBuilder sb = new StringBuilder(); |
|
|
51 |
for (String cid: cids) { |
|
|
52 |
sb.append(cid + ","); |
|
|
53 |
} |
|
|
54 |
String cidstr0 = sb.toString(); |
|
|
55 |
String cidstr = cidstr0.replaceAll("[,]$", ""); |
|
|
56 |
// String cidstr = cidstr0.substring(0, cidstr0.length() - 1); |
|
|
57 |
bw.write(cidstr + "\n"); |
|
|
58 |
} |
|
|
59 |
|
|
|
60 |
bw.close(); |
|
|
61 |
|
|
|
62 |
} |
|
|
63 |
|
|
|
64 |
private static Map<Integer, Set<String>> getCids(String lfname) throws IOException { |
|
|
65 |
Map<Integer, Set<String>> pid_cids = new HashMap<>(); |
|
|
66 |
BufferedReader br = new BufferedReader(new FileReader(new File(lfname))); |
|
|
67 |
String line = br.readLine(); |
|
|
68 |
while ((line=br.readLine()) != null) { |
|
|
69 |
String[] parts = line.split(","); |
|
|
70 |
System.out.println(parts.length); |
|
|
71 |
int pid = Integer.parseInt(parts[0]); |
|
|
72 |
System.out.println(pid); |
|
|
73 |
String cid = parts[2]; |
|
|
74 |
System.out.println(cid); |
|
|
75 |
String ad_status = null; |
|
|
76 |
if (parts.length > 3) { |
|
|
77 |
ad_status = parts[3]; |
|
|
78 |
} |
|
|
79 |
|
|
|
80 |
if (!pid_cids.containsKey(pid)) { |
|
|
81 |
Set<String> cids = new HashSet<>(); |
|
|
82 |
cids.add(cid); |
|
|
83 |
if (ad_status != null && (ad_status.equals("AD") || ad_status.equals("MCI"))) cids.add("C0002395"); |
|
|
84 |
pid_cids.put(pid, cids); |
|
|
85 |
|
|
|
86 |
} else { |
|
|
87 |
pid_cids.get(pid).add(cid); |
|
|
88 |
if (ad_status != null && (ad_status.equals("AD") || ad_status.equals("MCI"))) pid_cids.get(pid).add("C0002395"); |
|
|
89 |
} |
|
|
90 |
} |
|
|
91 |
br.close(); |
|
|
92 |
return pid_cids; |
|
|
93 |
} |
|
|
94 |
} |
|
|
95 |
|
|
|
96 |
|
|
|
97 |
|
|
|
98 |
|