b/lvl2/genEns.py
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Aug 15 14:12:12 2015
+@author: rc, alex
+"""
+import os
+import sys
+if __name__ == '__main__' and __package__ is None:
+    filePath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    sys.path.append(filePath)
+import numpy as np
+import yaml
+from copy import deepcopy
+from collections import OrderedDict
+from sklearn.metrics import roc_auc_score
+from sklearn.cross_validation import LeaveOneLabelOut
+from preprocessing.aux import getEventNames
+from utils.ensembles import createEnsFunc, loadPredictions, getLvl1ModelList
+from ensembling.WeightedMean import WeightedMeanClassifier
+from ensembling.NeuralNet import NeuralNet
+from ensembling.XGB import XGB
+def _from_yaml_to_func(method, params):
+    """go from yaml to method.
+    Need to be here for accesing local variables.
+    """
+    prm = dict()
+    if params is not None:
+        for key, val in params.iteritems():
+            prm[key] = eval(str(val))
+    return eval(method)(**prm)
+# ## here read YAML and build models ###
+yml = yaml.load(open(sys.argv[1]))
+fileName = yml['Meta']['file']
+if 'subsample' in yml['Meta']:
+    subsample = yml['Meta']['subsample']
+else:
+    subsample = 1
+modelName, modelParams = yml['Model'].iteritems().next()
+model_base = _from_yaml_to_func(modelName, modelParams)
+ensemble = yml['Model'][modelName]['ensemble']
+addSubjectID = True if 'addSubjectID' in yml.keys() else False
+mode = sys.argv[2]
+if mode == 'val':
+    test = False
+elif mode == 'test':
+    test = True
+else:
+    raise('Invalid mode. Please specify either val or test')
+print('Running %s in mode %s, predictions will be saved as %s' % (modelName,mode,fileName))
+######
+cols = getEventNames()
+ids = np.load('../infos_test.npy')
+subjects_test = ids[:, 1]
+series_test = ids[:, 2]
+ids = ids[:, 0]
+labels = np.load('../infos_val.npy')
+subjects = labels[:, -2]
+series = labels[:, -1]
+labels = labels[:, :-2]
+allCols = range(len(cols))
+# ## loading predictions ###
+files = getLvl1ModelList()
+preds_val = OrderedDict()
+for f in files:
+    loadPredictions(preds_val, f[0], f[1])
+# validity check
+for m in ensemble:
+    assert(m in preds_val)
+# ## train/test ###
+aggr = createEnsFunc(ensemble)
+dataTrain = aggr(preds_val)
+preds_val = None
+# optionally adding subjectIDs
+if addSubjectID:
+    dataTrain = np.c_[dataTrain, subjects]
+np.random.seed(4234521)
+if test:
+    # train the model
+    model = deepcopy(model_base)
+    model.fit(dataTrain[::subsample], labels[::subsample])
+    dataTrain = None
+    # load test data
+    preds_test = OrderedDict()
+    for f in files:
+        loadPredictions(preds_test, f[0], f[1], test=True)
+    dataTest = aggr(preds_test)
+    preds_test = None
+    # switch to add subjects
+    if addSubjectID:
+        dataTest = np.c_[dataTest, subjects_test]
+    # get predictions
+    p = model.predict_proba(dataTest)
+    np.save('test/test_%s.npy' % fileName, [p])
+else:
+    auc_tot = []
+    p = np.zeros(labels.shape)
+    cv = LeaveOneLabelOut(series)
+    for fold, (train, test) in enumerate(cv):
+        model = deepcopy(model_base)
+        if modelName == 'NeuralNet':
+            # passing also test data to print out test error during training
+            model.fit(dataTrain[train], labels[train], dataTrain[test],
+                      labels[test])
+        else:
+            model.fit(dataTrain[train][::subsample], labels[train][::subsample])
+        p[test] = model.predict_proba(dataTrain[test])
+        auc = [roc_auc_score(labels[test][:, col], p[test][:, col])
+               for col in allCols]
+        auc_tot.append(np.mean(auc))
+        print('Fold %d, score: %.5f' % (fold, auc_tot[-1]))
+    print('AUC: %.5f' % np.mean(auc_tot))
+    np.save('val/val_%s.npy' % fileName, [p])