Diff of /XgBoost code [000000] .. [16f085]

Switch to unified view

a b/XgBoost code
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Sun Feb 26 15:36:18 2023
5
6
@author: ming
7
"""
8
9
10
11
#import packages
12
from sklearn.metrics import precision_score
13
14
import xgboost as xgb
15
import numpy as np
16
import pandas as pd
17
import matplotlib.pyplot as plt
18
from matplotlib.colors import ListedColormap
19
from sklearn.preprocessing import LabelEncoder
20
from sklearn.metrics import classification_report
21
from sklearn.model_selection import GridSearchCV #网格搜索
22
import matplotlib.pyplot as plt#可视化
23
import seaborn as sns#绘图包
24
from sklearn.model_selection import train_test_split
25
26
27
# import data
28
X=pd.read_csv(r"/Users/ming/Downloads/Xset3.csv")
29
Y=pd.read_csv(r"/Users/ming/Downloads/Y.csv")
30
31
32
33
from imblearn.over_sampling import RandomOverSampler
34
ros = RandomOverSampler(random_state=0)
35
X_resampled, y_resampled = ros.fit_resample(X, Y)
36
X_resampled.info()
37
38
# split data
39
trainX, testX, trainY, testY = train_test_split(X_resampled, y_resampled, random_state=3, test_size=0.2)
40
41
model=xgb.XGBClassifier()
42
# 训练模型
43
model.fit(trainX,trainY)
44
# 预测值
45
y_pred = model.predict(testX)
46
47
48
49
50
 
51
52
53
#评估指标
54
55
# 求出预测和真实一样的数目
56
true = np.sum(y_pred == testY.values.tolist())
57
print('the right number of prediction:', true)
58
print('预测错的的结果数目为:', testY.shape[0]-true)
59
# 评估指标
60
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,cohen_kappa_score
61
print('预测数据的准确率为: {:.4}%'.format(accuracy_score(testY,y_pred)*100))
62
print('预测数据的精确率为:{:.4}%'.format(
63
      precision_score(testY,y_pred)*100))
64
print('预测数据的召回率为:{:.4}%'.format(
65
      recall_score(testY,y_pred)*100))
66
# print("训练数据的F1值为:", f1score_train)
67
print('预测数据的F1值为:',
68
      f1_score(testY,y_pred))
69
print('预测数据的Cohen’s Kappa系数为:',
70
      cohen_kappa_score(testY,y_pred))
71
# 打印分类报告
72
print('the evaluation report:',
73
      classification_report(testY,y_pred))
74
75
from xgboost import plot_importance
76
# plt.figure(figsize=(15,15))
77
plt.rcParams["figure.figsize"] = (14, 8)
78
plot_importance(model)
79
80
81
82
# ROC曲线、AUC
83
from sklearn.metrics import precision_recall_curve
84
from sklearn import metrics
85
# 预测正例的概率
86
y_pred_prob=model.predict_proba(testX)[:,1]
87
# y_pred_prob ,返回两列,第一列代表类别0,第二列代表类别1的概率
88
fpr, tpr, thresholds = metrics.roc_curve(testY,y_pred_prob, pos_label=1)
89
#pos_label,代表真阳性标签,就是说是分类里面的好的标签,这个要看你的特征目标标签是0,1,还是1,2
90
roc_auc = metrics.auc(fpr, tpr)  #auc为Roc曲线下的面积
91
# print(roc_auc)
92
plt.figure(figsize=(8,6))
93
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
94
plt.plot(fpr, tpr, 'r',label='AUC = %0.2f'% roc_auc)
95
plt.legend(loc='lower right')
96
# plt.plot([0, 1], [0, 1], 'r--')
97
plt.xlim([0, 1.1])
98
plt.ylim([0, 1.1])
99
plt.xlabel('False Positive Rate') #横坐标是fpr
100
plt.ylabel('True Positive Rate')  #纵坐标是tpr
101
plt.title('Receiver operating characteristic example')
102
plt.show()
103
104
y_pred_prob1=model.predict_proba(X)[:,1]
105
y_pred1=model.predict(X)
106
107
#5-fold cross validation
108
from sklearn.model_selection import cross_val_score
109
scores = cross_val_score(model, X_resampled, y_resampled, cv=10)
110
print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))
111