[2d4573]: / mimic_icd9_coding / utils / classifier.py

Download this file

41 lines (31 with data), 1.3 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from __future__ import unicode_literals
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from collections import Counter
import pandas as pd
import numpy as np
class Classifier():
def __init__(self):
#Multi label classifier
forest = RandomForestClassifier(n_estimators=100, random_state=1)
self.clf = MultiOutputClassifier(forest, n_jobs=-1)
def fit(self, X, y):
self.clf.fit(X, y)
def predict(self, X):
y_pred = np.array(self.clf.predict(X))
return y_pred
def predict_proba(self, X):
raise NotImplemented
"""
Compte the probailities for each label
Important: this class needs to return an 2D array with 2 columns per label, so 109*2 columns. """
proba = self.clf.predict_proba(X)
#Proba is a list of size 109, one for each label, each element is an array of size n_samples * 2,
#except some times when it is n_sample*1 so a little work is needed to reshape the array
y_proba = proba[0]
for x in proba[1 : ] :
if x.shape[1] == 2 :
y_proba = np.hstack((y_proba,x))
else:
y_proba = np.hstack((y_proba,x,np.zeros_like(x)))
return y_proba