a b/lvl2/genEns.py
1
# -*- coding: utf-8 -*-
2
"""
3
Created on Sat Aug 15 14:12:12 2015
4
5
@author: rc, alex
6
"""
7
import os
8
import sys
9
if __name__ == '__main__' and __package__ is None:
10
    filePath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
11
    sys.path.append(filePath)
12
13
import numpy as np
14
import yaml
15
from copy import deepcopy
16
from collections import OrderedDict
17
from sklearn.metrics import roc_auc_score
18
from sklearn.cross_validation import LeaveOneLabelOut
19
20
from preprocessing.aux import getEventNames
21
from utils.ensembles import createEnsFunc, loadPredictions, getLvl1ModelList
22
23
from ensembling.WeightedMean import WeightedMeanClassifier
24
from ensembling.NeuralNet import NeuralNet
25
from ensembling.XGB import XGB
26
27
28
def _from_yaml_to_func(method, params):
29
    """go from yaml to method.
30
31
    Need to be here for accesing local variables.
32
    """
33
    prm = dict()
34
    if params is not None:
35
        for key, val in params.iteritems():
36
            prm[key] = eval(str(val))
37
    return eval(method)(**prm)
38
39
# ## here read YAML and build models ###
40
yml = yaml.load(open(sys.argv[1]))
41
42
fileName = yml['Meta']['file']
43
if 'subsample' in yml['Meta']:
44
    subsample = yml['Meta']['subsample']
45
else:
46
    subsample = 1
47
48
modelName, modelParams = yml['Model'].iteritems().next()
49
model_base = _from_yaml_to_func(modelName, modelParams)
50
51
ensemble = yml['Model'][modelName]['ensemble']
52
addSubjectID = True if 'addSubjectID' in yml.keys() else False
53
54
mode = sys.argv[2]
55
if mode == 'val':
56
    test = False
57
elif mode == 'test':
58
    test = True
59
else:
60
    raise('Invalid mode. Please specify either val or test')
61
62
print('Running %s in mode %s, predictions will be saved as %s' % (modelName,mode,fileName))
63
64
######
65
cols = getEventNames()
66
67
ids = np.load('../infos_test.npy')
68
subjects_test = ids[:, 1]
69
series_test = ids[:, 2]
70
ids = ids[:, 0]
71
labels = np.load('../infos_val.npy')
72
subjects = labels[:, -2]
73
series = labels[:, -1]
74
labels = labels[:, :-2]
75
76
allCols = range(len(cols))
77
78
# ## loading predictions ###
79
files = getLvl1ModelList()
80
81
preds_val = OrderedDict()
82
for f in files:
83
    loadPredictions(preds_val, f[0], f[1])
84
# validity check
85
for m in ensemble:
86
    assert(m in preds_val)
87
88
# ## train/test ###
89
aggr = createEnsFunc(ensemble)
90
dataTrain = aggr(preds_val)
91
preds_val = None
92
93
# optionally adding subjectIDs
94
if addSubjectID:
95
    dataTrain = np.c_[dataTrain, subjects]
96
97
np.random.seed(4234521)
98
99
if test:
100
    # train the model
101
    model = deepcopy(model_base)
102
    model.fit(dataTrain[::subsample], labels[::subsample])
103
    dataTrain = None
104
105
    # load test data
106
    preds_test = OrderedDict()
107
    for f in files:
108
        loadPredictions(preds_test, f[0], f[1], test=True)
109
    dataTest = aggr(preds_test)
110
    preds_test = None
111
    # switch to add subjects
112
    if addSubjectID:
113
        dataTest = np.c_[dataTest, subjects_test]
114
115
    # get predictions
116
    p = model.predict_proba(dataTest)
117
    np.save('test/test_%s.npy' % fileName, [p])
118
else:
119
    auc_tot = []
120
    p = np.zeros(labels.shape)
121
    cv = LeaveOneLabelOut(series)
122
    for fold, (train, test) in enumerate(cv):
123
        model = deepcopy(model_base)
124
        if modelName == 'NeuralNet':
125
            # passing also test data to print out test error during training
126
            model.fit(dataTrain[train], labels[train], dataTrain[test],
127
                      labels[test])
128
        else:
129
            model.fit(dataTrain[train][::subsample], labels[train][::subsample])
130
        p[test] = model.predict_proba(dataTrain[test])
131
        auc = [roc_auc_score(labels[test][:, col], p[test][:, col])
132
               for col in allCols]
133
        auc_tot.append(np.mean(auc))
134
        print('Fold %d, score: %.5f' % (fold, auc_tot[-1]))
135
    print('AUC: %.5f' % np.mean(auc_tot))
136
    np.save('val/val_%s.npy' % fileName, [p])