--- a +++ b/src/utils/UpSetR.java @@ -0,0 +1,176 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package utils; + +import biodiscml.BestModelSelectionAndReport; +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.HashMap; + +/** + * + * @author mickael + */ +public class UpSetR { + + public void creatUpSetRDataset(String featureSelectionFile, String predictionsResultsFile) { + System.out.println("# create UpSetR file"); + String outfile = predictionsResultsFile.replace(".csv", ".UpSetR.csv"); + //create header + ArrayList<String> featuresHeader = new ArrayList<>(); + try { + BufferedReader br = new BufferedReader(new FileReader(featureSelectionFile.replace(".csv", ".arff"))); + String line = br.readLine(); //relation + br.readLine(); //empty line + line = br.readLine(); // @attribute + while (line.startsWith("@attribute")) { + featuresHeader.add(line.replace("@attribute ", "").replaceAll(" \\w+$", "")); + line = br.readLine(); + } + } catch (Exception e) { + e.printStackTrace(); + } + featuresHeader.remove(featuresHeader.size() - 1); + featuresHeader.remove(0); + + //output + try { + BufferedReader br = new BufferedReader(new FileReader(predictionsResultsFile)); + PrintWriter pw = new PrintWriter(new FileWriter(outfile)); + pw.println("ID," + + "TRAIN_10CV_MCC" + + ",TRAIN_LOOCV_MCC" + + ",TRAIN_BS_MCC" + + ",TEST_MCC" + + ",TRAIN_TEST_BS_MCC" + + ",AVG_MCC," + + featuresHeader.toString().replace("[", "").replace("]", "").trim() + ""); + + pw.flush(); + String line = br.readLine(); + + while (br.ready()) { + line = br.readLine(); + BestModelSelectionAndReport.classificationObject co = new BestModelSelectionAndReport.classificationObject(line); + + //get ID + ArrayList<String> featureList = co.featureList; + featureList.remove(featureList.size() - 1); + featureList.remove(0); + int[] tab = new int[featuresHeader.size()]; + try { + for (String index : featureList) { + tab[Integer.valueOf(index) - 2] = 1; + } + } catch (Exception e) { + e.printStackTrace(); + } + String features = ""; + for (int i : tab) { + features += "," + i; + } + + String out = "" + co.identifier + "" + + "," + co.hmValues.get("TRAIN_10CV_MCC") + + "," + co.hmValues.get("TRAIN_LOOCV_MCC") + + "," + co.hmValues.get("TRAIN_BS_MCC") + + "," + co.hmValues.get("TEST_MCC") + + "," + co.hmValues.get("TRAIN_TEST_BS_MCC") + + "," + co.hmValues.get("AVG_MCC") + features + ""; + pw.println(out); + } + pw.close(); + } catch (Exception e) { + e.printStackTrace(); + } + System.out.println("UpSetR file: " + outfile); + } + + public void creatUpSetRDatasetFromSignature(BestModelSelectionAndReport.classificationObject co_model, + String featureSelectionFile, String predictionsResultsFile) { + System.out.println("# create UpSetR file"); + String outfile = predictionsResultsFile.replace(".csv", ".UpSetR.csv"); + //create header + ArrayList<String> alFeaturesOrder = new ArrayList<>(); + try { + BufferedReader br = new BufferedReader(new FileReader(featureSelectionFile.replace(".csv", ".arff"))); + String line = br.readLine(); //relation + br.readLine(); //empty line + line = br.readLine(); // @attribute + while (line.startsWith("@attribute")) { + alFeaturesOrder.add(line.replace("@attribute ", "").replaceAll(" \\w+$", "")); + line = br.readLine(); + } + } catch (Exception e) { + e.printStackTrace(); + } + + alFeaturesOrder.remove(alFeaturesOrder.size() - 1); + + //retreive signature + String featuresHeader = ""; + for (int i = 1; i < co_model.featureList.size() - 1; i++) { + featuresHeader += "," + alFeaturesOrder.get(Integer.valueOf(co_model.featureList.get(i)) - 1); + } + + //output + try { + //header + BufferedReader br = new BufferedReader(new FileReader(predictionsResultsFile)); + PrintWriter pw = new PrintWriter(new FileWriter(outfile)); + pw.println("ID," + + "TRAIN_10CV_MCC" + + ",TRAIN_LOOCV_MCC" + + ",TRAIN_BS_MCC" + + ",TEST_MCC" + + ",TRAIN_TEST_BS_MCC" + + ",AVG_MCC" + + featuresHeader); + + pw.flush(); + String line = br.readLine(); + + //content + while (br.ready()) { + line = br.readLine(); + BestModelSelectionAndReport.classificationObject co_line = new BestModelSelectionAndReport.classificationObject(line); + ArrayList<String> featureList = co_line.featureList; + if (co_line.identifier.equals("trees.RandomForest_AUC_FB_19_0.9571_877")){ + System.out.println(""); + } + + HashMap<String, String> hmFeaturesLine = new HashMap<>(); + for (int i = 1; i < co_line.featureList.size() - 1; i++) { + hmFeaturesLine.put(co_line.featureList.get(i),""); + } + String presence = ""; + for (int i = 1; i < co_model.featureList.size() - 1; i++) { + if (hmFeaturesLine.containsKey(co_model.featureList.get(i) + "")) { + presence += ",1"; + } else { + presence += ",0"; + } + } + String out = "" + co_line.identifier + "" + + "," + co_line.hmValues.get("TRAIN_10CV_MCC") + + "," + co_line.hmValues.get("TRAIN_LOOCV_MCC") + + "," + co_line.hmValues.get("TRAIN_BS_MCC") + + "," + co_line.hmValues.get("TEST_MCC") + + "," + co_line.hmValues.get("TRAIN_TEST_BS_MCC") + + "," + co_line.hmValues.get("AVG_MCC") + presence + ""; + pw.println(out); + } + pw.close(); + } catch (Exception e) { + e.printStackTrace(); + } + System.out.println("UpSetR file: " + outfile); + } + +}