[16f085]: / Random Forest code(python)

Download this file

61 lines (48 with data), 1.8 kB

import pandas as pd
from fancyimpute import KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn import preprocessing

from sklearn.metrics import roc_auc_score


#load data
X=pd.read_csv(r"/Users/ming/Downloads/Xset.csv")
Y=pd.read_csv(r"/Users/ming/Downloads/Y.csv")

#del columns
X.dropna(thresh=100)

#impute
Xc=X.columns

X = pd.DataFrame(KNN(k=6).fit_transform(X)) 
X.columns=Xc

#standardize
standardized_X = preprocessing.scale(X)


models = {
    "knn": KNeighborsClassifier(n_neighbors=1),
    "naive_bayes": GaussianNB(),
    "logit": LogisticRegression(solver="lbfgs", multi_class="auto"),
    "svm": SVC(kernel="rbf", gamma="auto"),
    "decision_tree": DecisionTreeClassifier(),
    "random_forest": RandomForestClassifier(n_estimators=100),
    "mlp": MLPClassifier()
}

trainX, testX, trainY, testY = train_test_split(standardized_X, Y, random_state=3, test_size=0.2)

print("use '{}' bulid model...".format("random_forest"))
model = models["random_forest"]
model.fit(trainX, trainY)
features_importance = pd.concat([pd.DataFrame(Xc).T,pd.DataFrame(model.feature_importances_).T],axis=0).T
features_importance.columns=["feature","importance"]

## prediction
print("elvaluation...")
predictions = model.predict(testX)
print(classification_report(testY, predictions))
auc_score = roc_auc_score(testY, predictions)
print('CONFUSION MATRIX')
print(confusion_matrix(testY, predictions))
print("AUC")
print(auc_score)