Switch to unified view

a b/assoc_rules/ProcRules.java
1
package assoc_rules;
2
import java.io.BufferedReader;
3
import java.io.BufferedWriter;
4
import java.io.File;
5
import java.io.FileReader;
6
import java.io.FileWriter;
7
import java.io.IOException;
8
import java.util.regex.Matcher;
9
import java.util.regex.Pattern;
10
11
/**
12
 * This class provides methods for processing mined association rules
13
 * @author zhengc
14
 *
15
 */
16
public class ProcRules {
17
18
    public static void main(String[] args) throws IOException {
19
20
        /* Command line arguments*/
21
        String rules = args[0];
22
        String out = args[1];
23
        String category = args[2];
24
        
25
        BufferedReader br = new BufferedReader(new FileReader(new File(rules)));
26
        BufferedWriter bw = new BufferedWriter(new FileWriter(new File(out)));
27
        
28
        // Writer header line
29
        String header = category + "_1" + "|" + "Support_1 " + "|" + category + "_2" + "|" +"Support_2" + "|" +
30
        "Confidence" + "|" + "Lift" + "|" + "Leverage" + "|" + "Conviction" + "\n";
31
        bw.write(header);
32
        
33
        /* The following codes parse association rules from weka FPGrowth output 
34
         * into easy handled format for downstream analysis 
35
         * It takes several steps
36
         */
37
        String line = br.readLine();
38
        //System.out.println(line);
39
        line = br.readLine();
40
        //System.out.println(line);
41
        while ((line = br.readLine()) != null) {
42
            String procline = "";
43
            
44
            //Step 1: Remove meaningless symbols including "=T", "<>", and count number in the begging etc
45
            //String pat = "=T|\\<?[a-z]+:|\\>|:|=|\\(|\\)|\\s*\\d+\\. +";
46
            String pat = "=1|\\<?[a-z]+:|\\>|:|=|\\(|\\)|\\s*\\d+\\. +";
47
            Pattern p = Pattern.compile(pat);
48
            Matcher m = p.matcher(line);
49
            line = m.replaceAll("");
50
            
51
            //Step 2: Distinguish spaces inside features from outside by replace spaces inside with "_"   
52
//          String pat0 = "(\\[([A-Z\\-\\d]+ +)+[A-Z\\-\\d]+\\])|(\\[([A-Z \\-\\d]+, )+([A-Z \\-\\d])+\\])";
53
            String pat0 = "(\\[\\s+\\w+\\]|\\[([\\w\\-\\s/]+\\s+)+[\\w\\-/]+\\])|(\\[([\\w\\s\\-/]+,\\s)+([\\w\\s\\-/])+\\])";
54
            Pattern p0 = Pattern.compile(pat0);
55
            Matcher m0 = p0.matcher(line);
56
            StringBuffer sb = new StringBuffer();
57
//          System.out.println(m0.find());
58
            while (m0.find()) {
59
                String replace = line.substring(m0.start(), m0.end()).replace(" ", "_");
60
//              System.out.println(replace);
61
                m0.appendReplacement(sb, replace);
62
            }
63
            m0.appendTail(sb);
64
            line = sb.toString();
65
            
66
            //Step 3: Replace spaces outside of features with "|"
67
            String delim = " +";
68
            String[] parts = line.split(delim);
69
            for (int i=0; i < parts.length; i++) {
70
                procline = String.join("|", procline, parts[i]);
71
            }
72
            procline = procline.replaceFirst("\\|", "");
73
            
74
            //Step 4: Change back spaces inside of features
75
            String pat4 = "_";
76
            Pattern p4 = Pattern.compile(pat4);
77
            Matcher m4 = p4.matcher(procline);
78
            procline = m4.replaceAll(" ");
79
            System.out.println(procline);
80
            
81
            bw.write(procline + "\n"); // Write to file
82
                    
83
        }
84
        br.close();
85
        bw.close();
86
87
    }
88
89
}