Diff of /dataprep/WidthToArff.java [000000] .. [4fba4e]

Switch to unified view

a b/dataprep/WidthToArff.java
1
package dataprep;
2
import java.io.BufferedReader;
3
import java.io.BufferedWriter;
4
import java.io.File;
5
import java.io.FileNotFoundException;
6
import java.io.FileReader;
7
import java.io.FileWriter;
8
import java.io.IOException;
9
import java.util.ArrayList;
10
import java.util.Collections;
11
import java.util.HashMap;
12
import java.util.HashSet;
13
import java.util.List;
14
import java.util.Map;
15
import java.util.Set;
16
17
/**
18
 * Provides methods for converting cleaned FAERS data into an arff file
19
 * @author zhengc
20
 *
21
 */
22
public class WidthToArff {
23
24
    public static void main(String[] args) throws IOException {
25
//      String widthfile = args[0];
26
//      String arfffile = args[1];
27
28
        String widthfile = "/Users/zhengc/workspace/FARES/data/FARES/UMLS_map_data/ID_indications_all_clean_width_umls_id_diso_AD_filtered";
29
        String arfffile = "/Users/zhengc/workspace/FARES/data/FARES/UMLS_map_data/ID_indications_all_clean_width_umls_id_diso_AD_filtered_sp.arff";
30
                
31
        // write header info
32
        BufferedReader br = new BufferedReader(new FileReader(new File(widthfile)));
33
        BufferedWriter bw = new BufferedWriter(new FileWriter(new File(arfffile)));
34
        bw.write("@relation " + "widthfile" + "\n");
35
        bw.write("\n");
36
        
37
//      Map<String, Integer> itemindex = getItemIndex(csvfile);
38
        Set<String> itemset = getItemSet(widthfile);
39
        System.out.println(itemset.size());
40
        List<String> itemlist = new ArrayList<String>(itemset);
41
        Collections.sort(itemlist);
42
        for (String item : itemlist) {
43
            bw.write("@attribute " + "'" + item + "'" + " {0,1}" + "\n");
44
        }
45
        
46
        // write data
47
        bw.write("\n");
48
        bw.write("@data" + "\n");
49
//      String line = null;
50
        String line = br.readLine();
51
//      while ((line=br.readLine()) != null) {
52
//          bw.write(line + "\n");
53
//      }
54
        
55
        while ((line=br.readLine()) != null) {
56
            StringBuilder sb = new StringBuilder();
57
            String[] parts = line.split(",");
58
            
59
            // get unique indications
60
            Set<String> partsset = new HashSet<String>();
61
            for (int i=0; i<parts.length; i++) {
62
                partsset.add(parts[i]);
63
            }
64
            // convert to list and sort the list
65
            List<String> partslist = new ArrayList<String>(partsset);
66
            Collections.sort(partslist);
67
            
68
            bw.write("{");
69
            for (int i=0; i<partslist.size(); i++) {
70
                int index = itemlist.indexOf(partslist.get(i));
71
                sb.append(index + " " + "1" + ",");
72
            }
73
            sb.append("}");
74
            String spline = sb.toString(); 
75
            String splinef = spline.replace(",}", "}"); //remove last comma
76
            bw.write(splinef + "\n");
77
        }
78
        bw.close();
79
        br.close();
80
    }
81
    
82
    private static Map<String, Integer> getItemIndex(String filename) throws IOException {
83
        Map<String,Integer> itemindex = new HashMap<String, Integer>();
84
        BufferedReader br = new BufferedReader(new FileReader(new File(filename)));
85
        String line = null;
86
        int m = 0;
87
        while ((line=br.readLine()) != null) {
88
            String[] parts = line.split(",");
89
            for (int i = 0; i < parts.length; i++) {
90
                if (!itemindex.containsKey(parts[i])) {
91
                    itemindex.put(parts[i], m);
92
                    m++;
93
                }
94
            }
95
        }
96
        br.close();
97
        return itemindex;
98
    }
99
    
100
    private static Set<String> getItemSet(String filename) throws IOException {
101
        Set<String> itemset = new HashSet<String>();
102
        BufferedReader br = new BufferedReader(new FileReader(new File(filename)));
103
//      String line = null;
104
        String line = br.readLine();
105
        while ((line=br.readLine()) != null) {
106
            String[] parts = line.split(",");
107
            for (int i = 0; i < parts.length; i++) {
108
                    itemset.add(parts[i]);
109
                    
110
            }
111
        }
112
        br.close();
113
        return itemset;
114
    }
115
}
116