[61e40d]: / assoc_rules / ProcRules.java

Download this file

90 lines (75 with data), 2.9 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
package assoc_rules;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This class provides methods for processing mined association rules
* @author zhengc
*
*/
public class ProcRules {
public static void main(String[] args) throws IOException {
/* Command line arguments*/
String rules = args[0];
String out = args[1];
String category = args[2];
BufferedReader br = new BufferedReader(new FileReader(new File(rules)));
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(out)));
// Writer header line
String header = category + "_1" + "|" + "Support_1 " + "|" + category + "_2" + "|" +"Support_2" + "|" +
"Confidence" + "|" + "Lift" + "|" + "Leverage" + "|" + "Conviction" + "\n";
bw.write(header);
/* The following codes parse association rules from weka FPGrowth output
* into easy handled format for downstream analysis
* It takes several steps
*/
String line = br.readLine();
//System.out.println(line);
line = br.readLine();
//System.out.println(line);
while ((line = br.readLine()) != null) {
String procline = "";
//Step 1: Remove meaningless symbols including "=T", "<>", and count number in the begging etc
//String pat = "=T|\\<?[a-z]+:|\\>|:|=|\\(|\\)|\\s*\\d+\\. +";
String pat = "=1|\\<?[a-z]+:|\\>|:|=|\\(|\\)|\\s*\\d+\\. +";
Pattern p = Pattern.compile(pat);
Matcher m = p.matcher(line);
line = m.replaceAll("");
//Step 2: Distinguish spaces inside features from outside by replace spaces inside with "_"
// String pat0 = "(\\[([A-Z\\-\\d]+ +)+[A-Z\\-\\d]+\\])|(\\[([A-Z \\-\\d]+, )+([A-Z \\-\\d])+\\])";
String pat0 = "(\\[\\s+\\w+\\]|\\[([\\w\\-\\s/]+\\s+)+[\\w\\-/]+\\])|(\\[([\\w\\s\\-/]+,\\s)+([\\w\\s\\-/])+\\])";
Pattern p0 = Pattern.compile(pat0);
Matcher m0 = p0.matcher(line);
StringBuffer sb = new StringBuffer();
// System.out.println(m0.find());
while (m0.find()) {
String replace = line.substring(m0.start(), m0.end()).replace(" ", "_");
// System.out.println(replace);
m0.appendReplacement(sb, replace);
}
m0.appendTail(sb);
line = sb.toString();
//Step 3: Replace spaces outside of features with "|"
String delim = " +";
String[] parts = line.split(delim);
for (int i=0; i < parts.length; i++) {
procline = String.join("|", procline, parts[i]);
}
procline = procline.replaceFirst("\\|", "");
//Step 4: Change back spaces inside of features
String pat4 = "_";
Pattern p4 = Pattern.compile(pat4);
Matcher m4 = p4.matcher(procline);
procline = m4.replaceAll(" ");
System.out.println(procline);
bw.write(procline + "\n"); // Write to file
}
br.close();
bw.close();
}
}