Switch to unified view

a b/Random Forest code(python)
1
import pandas as pd
2
from fancyimpute import KNN
3
from sklearn.neighbors import KNeighborsClassifier
4
from sklearn.naive_bayes import GaussianNB
5
from sklearn.linear_model import LogisticRegression
6
from sklearn.svm import SVC
7
from sklearn.tree import DecisionTreeClassifier
8
from sklearn.ensemble import RandomForestClassifier
9
from sklearn.neural_network import MLPClassifier
10
from sklearn.model_selection import train_test_split
11
from sklearn.metrics import classification_report,confusion_matrix
12
from sklearn import preprocessing
13
14
from sklearn.metrics import roc_auc_score
15
16
17
#load data
18
X=pd.read_csv(r"/Users/ming/Downloads/Xset.csv")
19
Y=pd.read_csv(r"/Users/ming/Downloads/Y.csv")
20
21
#del columns
22
X.dropna(thresh=100)
23
24
#impute
25
Xc=X.columns
26
27
X = pd.DataFrame(KNN(k=6).fit_transform(X)) 
28
X.columns=Xc
29
30
#standardize
31
standardized_X = preprocessing.scale(X)
32
33
34
models = {
35
    "knn": KNeighborsClassifier(n_neighbors=1),
36
    "naive_bayes": GaussianNB(),
37
    "logit": LogisticRegression(solver="lbfgs", multi_class="auto"),
38
    "svm": SVC(kernel="rbf", gamma="auto"),
39
    "decision_tree": DecisionTreeClassifier(),
40
    "random_forest": RandomForestClassifier(n_estimators=100),
41
    "mlp": MLPClassifier()
42
}
43
44
trainX, testX, trainY, testY = train_test_split(standardized_X, Y, random_state=3, test_size=0.2)
45
46
print("use '{}' bulid model...".format("random_forest"))
47
model = models["random_forest"]
48
model.fit(trainX, trainY)
49
features_importance = pd.concat([pd.DataFrame(Xc).T,pd.DataFrame(model.feature_importances_).T],axis=0).T
50
features_importance.columns=["feature","importance"]
51
52
## prediction
53
print("elvaluation...")
54
predictions = model.predict(testX)
55
print(classification_report(testY, predictions))
56
auc_score = roc_auc_score(testY, predictions)
57
print('CONFUSION MATRIX')
58
print(confusion_matrix(testY, predictions))
59
print("AUC")
60
print(auc_score)