|
a |
|
b/Random Forest code(python) |
|
|
1 |
import pandas as pd |
|
|
2 |
from fancyimpute import KNN |
|
|
3 |
from sklearn.neighbors import KNeighborsClassifier |
|
|
4 |
from sklearn.naive_bayes import GaussianNB |
|
|
5 |
from sklearn.linear_model import LogisticRegression |
|
|
6 |
from sklearn.svm import SVC |
|
|
7 |
from sklearn.tree import DecisionTreeClassifier |
|
|
8 |
from sklearn.ensemble import RandomForestClassifier |
|
|
9 |
from sklearn.neural_network import MLPClassifier |
|
|
10 |
from sklearn.model_selection import train_test_split |
|
|
11 |
from sklearn.metrics import classification_report,confusion_matrix |
|
|
12 |
from sklearn import preprocessing |
|
|
13 |
|
|
|
14 |
from sklearn.metrics import roc_auc_score |
|
|
15 |
|
|
|
16 |
|
|
|
17 |
#load data |
|
|
18 |
X=pd.read_csv(r"/Users/ming/Downloads/Xset.csv") |
|
|
19 |
Y=pd.read_csv(r"/Users/ming/Downloads/Y.csv") |
|
|
20 |
|
|
|
21 |
#del columns |
|
|
22 |
X.dropna(thresh=100) |
|
|
23 |
|
|
|
24 |
#impute |
|
|
25 |
Xc=X.columns |
|
|
26 |
|
|
|
27 |
X = pd.DataFrame(KNN(k=6).fit_transform(X)) |
|
|
28 |
X.columns=Xc |
|
|
29 |
|
|
|
30 |
#standardize |
|
|
31 |
standardized_X = preprocessing.scale(X) |
|
|
32 |
|
|
|
33 |
|
|
|
34 |
models = { |
|
|
35 |
"knn": KNeighborsClassifier(n_neighbors=1), |
|
|
36 |
"naive_bayes": GaussianNB(), |
|
|
37 |
"logit": LogisticRegression(solver="lbfgs", multi_class="auto"), |
|
|
38 |
"svm": SVC(kernel="rbf", gamma="auto"), |
|
|
39 |
"decision_tree": DecisionTreeClassifier(), |
|
|
40 |
"random_forest": RandomForestClassifier(n_estimators=100), |
|
|
41 |
"mlp": MLPClassifier() |
|
|
42 |
} |
|
|
43 |
|
|
|
44 |
trainX, testX, trainY, testY = train_test_split(standardized_X, Y, random_state=3, test_size=0.2) |
|
|
45 |
|
|
|
46 |
print("use '{}' bulid model...".format("random_forest")) |
|
|
47 |
model = models["random_forest"] |
|
|
48 |
model.fit(trainX, trainY) |
|
|
49 |
features_importance = pd.concat([pd.DataFrame(Xc).T,pd.DataFrame(model.feature_importances_).T],axis=0).T |
|
|
50 |
features_importance.columns=["feature","importance"] |
|
|
51 |
|
|
|
52 |
## prediction |
|
|
53 |
print("elvaluation...") |
|
|
54 |
predictions = model.predict(testX) |
|
|
55 |
print(classification_report(testY, predictions)) |
|
|
56 |
auc_score = roc_auc_score(testY, predictions) |
|
|
57 |
print('CONFUSION MATRIX') |
|
|
58 |
print(confusion_matrix(testY, predictions)) |
|
|
59 |
print("AUC") |
|
|
60 |
print(auc_score) |