Chemotherapy-toxicty-pred / Git / Diff of /XgBoost code

Models:

joseph-gordon/

Chemotherapy-toxicty-pred

Downloads: 1

Diff of /XgBoost code [000000] .. [16f085]

Switch to unified view

 b/XgBoost code
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Feb 26 15:36:18 2023
+@author: ming
+"""
+#import packages
+from sklearn.metrics import precision_score
+import xgboost as xgb
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.colors import ListedColormap
+from sklearn.preprocessing import LabelEncoder
+from sklearn.metrics import classification_report
+from sklearn.model_selection import GridSearchCV #网格搜索
+import matplotlib.pyplot as plt#可视化
+import seaborn as sns#绘图包
+from sklearn.model_selection import train_test_split
+# import data
+X=pd.read_csv(r"/Users/ming/Downloads/Xset3.csv")
+Y=pd.read_csv(r"/Users/ming/Downloads/Y.csv")
+from imblearn.over_sampling import RandomOverSampler
+ros = RandomOverSampler(random_state=0)
+X_resampled, y_resampled = ros.fit_resample(X, Y)
+X_resampled.info()
+# split data
+trainX, testX, trainY, testY = train_test_split(X_resampled, y_resampled, random_state=3, test_size=0.2)
+model=xgb.XGBClassifier()
+# 训练模型
+model.fit(trainX,trainY)
+# 预测值
+y_pred = model.predict(testX)
+#评估指标
+# 求出预测和真实一样的数目
+true = np.sum(y_pred == testY.values.tolist())
+print('the right number of prediction：', true)
+print('预测错的的结果数目为：', testY.shape[0]-true)
+# 评估指标
+from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,cohen_kappa_score
+print('预测数据的准确率为： {:.4}%'.format(accuracy_score(testY,y_pred)*100))
+print('预测数据的精确率为：{:.4}%'.format(
+      precision_score(testY,y_pred)*100))
+print('预测数据的召回率为：{:.4}%'.format(
+      recall_score(testY,y_pred)*100))
+# print("训练数据的F1值为：", f1score_train)
+print('预测数据的F1值为：',
+      f1_score(testY,y_pred))
+print('预测数据的Cohen’s Kappa系数为：',
+      cohen_kappa_score(testY,y_pred))
+# 打印分类报告
+print('the evaluation report：',
+      classification_report(testY,y_pred))
+from xgboost import plot_importance
+# plt.figure(figsize=(15,15))
+plt.rcParams["figure.figsize"] = (14, 8)
+plot_importance(model)
+# ROC曲线、AUC
+from sklearn.metrics import precision_recall_curve
+from sklearn import metrics
+# 预测正例的概率
+y_pred_prob=model.predict_proba(testX)[:,1]
+# y_pred_prob ,返回两列，第一列代表类别0,第二列代表类别1的概率
+fpr, tpr, thresholds = metrics.roc_curve(testY,y_pred_prob, pos_label=1)
+#pos_label，代表真阳性标签，就是说是分类里面的好的标签，这个要看你的特征目标标签是0,1，还是1,2
+roc_auc = metrics.auc(fpr, tpr)  #auc为Roc曲线下的面积
+# print(roc_auc)
+plt.figure(figsize=(8,6))
+plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+plt.plot(fpr, tpr, 'r',label='AUC = %0.2f'% roc_auc)
+plt.legend(loc='lower right')
+# plt.plot([0, 1], [0, 1], 'r--')
+plt.xlim([0, 1.1])
+plt.ylim([0, 1.1])
+plt.xlabel('False Positive Rate') #横坐标是fpr
+plt.ylabel('True Positive Rate')  #纵坐标是tpr
+plt.title('Receiver operating characteristic example')
+plt.show()
+y_pred_prob1=model.predict_proba(X)[:,1]
+y_pred1=model.predict(X)
+#5-fold cross validation
+from sklearn.model_selection import cross_val_score
+scores = cross_val_score(model, X_resampled, y_resampled, cv=10)
+print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))