Switch to unified view

a b/lvl2/genEns_BagsSubjects.py
1
# -*- coding: utf-8 -*-
2
"""
3
Created on Sat Aug 15 14:12:12 2015
4
5
@author: rc, alex
6
"""
7
import os
8
import sys
9
if __name__ == '__main__' and __package__ is None:
10
    filePath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
11
    sys.path.append(filePath)
12
13
import numpy as np
14
import yaml
15
from copy import deepcopy
16
from collections import OrderedDict
17
from sklearn.metrics import roc_auc_score
18
from sklearn.cross_validation import LeaveOneLabelOut
19
20
from preprocessing.aux import getEventNames, delay_preds
21
from utils.ensembles import createEnsFunc, loadPredictions, getLvl1ModelList
22
23
from ensembling.WeightedMean import WeightedMeanClassifier
24
from ensembling.NeuralNet import NeuralNet
25
from ensembling.XGB import XGB
26
27
def _from_yaml_to_func(method, params):
28
    """go from yaml to method.
29
30
    Need to be here for accesing local variables.
31
    """
32
    prm = dict()
33
    if params is not None:
34
        for key, val in params.iteritems():
35
            prm[key] = eval(str(val))
36
    return eval(method)(**prm)
37
38
# ## here read YAML and build models ###
39
yml = yaml.load(open(sys.argv[1]))
40
41
fileName = yml['Meta']['file']
42
if 'subsample' in yml['Meta']:
43
    subsample = yml['Meta']['subsample']
44
else:
45
    subsample = 1
46
47
nbags = yml['Meta']['nbags']
48
bagsize = yml['Meta']['bagsize']
49
50
modelName, modelParams = yml['Model'].iteritems().next()
51
model_base = _from_yaml_to_func(modelName, modelParams)
52
53
ensemble = yml['Model'][modelName]['ensemble']
54
addSubjectID = True if 'addSubjectID' in yml.keys() else False
55
56
mode = sys.argv[2]
57
if mode == 'val':
58
    test = False
59
elif mode == 'test':
60
    test = True
61
else:
62
    raise('Invalid mode. Please specify either val or test')
63
64
print('Running %s in mode %s, will be saved in %s' % (modelName,mode,fileName))
65
66
######
67
cols = getEventNames()
68
69
ids = np.load('../infos_test.npy')
70
subjects_test = ids[:, 1]
71
series_test = ids[:, 2]
72
ids = ids[:, 0]
73
labels = np.load('../infos_val.npy')
74
subjects = labels[:, -2]
75
series = labels[:, -1]
76
labels = labels[:, :-2]
77
78
allCols = range(len(cols))
79
80
# ## loading prediction ###
81
files = getLvl1ModelList()
82
83
preds_val = OrderedDict()
84
for f in files:
85
    loadPredictions(preds_val, f[0], f[1])
86
# validity check
87
for m in ensemble:
88
    assert(m in preds_val)
89
90
# ## train/test ###
91
aggr = createEnsFunc(ensemble)
92
dataTrain = aggr(preds_val)
93
preds_val = None
94
95
# switch to add subjects
96
if addSubjectID:
97
    dataTrain = np.c_[dataTrain, subjects]
98
99
np.random.seed(4234521)
100
101
if test:
102
    # train the model
103
    all_models = []
104
    for k in range(nbags):
105
        print("Train Bag #%d/%d" % (k+1, nbags))
106
        model = deepcopy(model_base)
107
        allsubjects = np.arange(1,13)
108
        np.random.shuffle(allsubjects)
109
        ix_subjects = np.sum([subjects==s for s in allsubjects[0:bagsize]], axis=0) != 0
110
        
111
        model.mdlNr = k
112
        model.fit(dataTrain[ix_subjects], labels[ix_subjects])
113
        all_models.append(model)
114
    dataTrain = None
115
116
    # load test data
117
    preds_test = OrderedDict()
118
    for f in files:
119
        loadPredictions(preds_test, f[0], f[1], test=True)
120
    dataTest = aggr(preds_test)
121
    preds_test = None
122
    # switch to add subjects
123
    if addSubjectID:
124
        dataTest = np.c_[dataTest, subjects_test]
125
126
    # get predictions
127
    p = np.zeros((len(ids),6))
128
    for k in range(nbags):
129
        print("Test Bag #%d" % (k+1))
130
        model = all_models.pop(0)
131
        p += model.predict_proba(dataTest) / nbags
132
    np.save('test/test_%s.npy' % fileName, [p])
133
else:
134
    auc_tot = []
135
    p = np.zeros(labels.shape)
136
    cv = LeaveOneLabelOut(series)
137
    for fold, (train, test) in enumerate(cv):
138
        for k in range(nbags):
139
            print("Train Bag #%d/%d" % (k+1, nbags))
140
            allsubjects = np.arange(1,13)
141
            np.random.shuffle(allsubjects)
142
            ix_subjects = np.sum([subjects[train]==s for s in allsubjects[0:bagsize]], axis=0) != 0
143
            model = deepcopy(model_base)
144
            model.mdlNr = k
145
            if modelName == 'NeuralNet':
146
                model.fit(dataTrain[train[ix_subjects]], labels[train[ix_subjects]], dataTrain[test],
147
                          labels[test])
148
            else:
149
                model.fit(dataTrain[train[ix_subjects]], labels[train[ix_subjects]])
150
            p[test] += model.predict_proba(dataTrain[test]) / nbags
151
            auc = [roc_auc_score(labels[test][:, col], p[test][:, col])
152
                   for col in allCols]
153
            print np.mean(auc)
154
        auc_tot.append(np.mean(auc))
155
        print('Fold %d, score: %.5f' % (fold, auc_tot[-1]))
156
    print('AUC: %.5f' % np.mean(auc_tot))
157
    np.save('val/val_%s.npy' % fileName, [p])