|
a |
|
b/model.py |
|
|
1 |
from sklearn.svm import SVC |
|
|
2 |
import pandas as pd |
|
|
3 |
from sklearn.model_selection import train_test_split |
|
|
4 |
import joblib |
|
|
5 |
from sklearn.ensemble import RandomForestClassifier |
|
|
6 |
import os |
|
|
7 |
from sklearn.metrics import log_loss, ConfusionMatrixDisplay, average_precision_score, accuracy_score |
|
|
8 |
from sklearn.metrics import roc_curve, precision_recall_curve, auc, f1_score, confusion_matrix |
|
|
9 |
import matplotlib.pyplot as plt |
|
|
10 |
import numpy as np |
|
|
11 |
import pickle |
|
|
12 |
#def read_data(path): |
|
|
13 |
# data = pd.read_csv(path) |
|
|
14 |
#data.set_index("ID_REF",inplace = True) |
|
|
15 |
#labels = data.pop("Result") |
|
|
16 |
# return data, labels |
|
|
17 |
dataset = pd.read_csv('GenesExp1.csv') |
|
|
18 |
dataset.set_index("ID_REF",inplace = True) |
|
|
19 |
X = dataset.iloc[:, :20] |
|
|
20 |
y = dataset.iloc[:, -1] |
|
|
21 |
path_model="F:/GeneModel/" |
|
|
22 |
#X,y=read_data("GenesExp1.csv") |
|
|
23 |
# name="Model" |
|
|
24 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40, random_state=1) |
|
|
25 |
model=RandomForestClassifier(criterion='gini', max_depth=6, min_samples_leaf=1, min_samples_split=2, |
|
|
26 |
n_estimators=100) |
|
|
27 |
model.fit(X_train, y_train) |
|
|
28 |
|
|
|
29 |
y_pred=model.predict(X_test) |
|
|
30 |
pickle.dump(model, open('model.pkl','wb')) |
|
|
31 |
|
|
|
32 |
model = pickle.load(open('model.pkl','rb')) |
|
|
33 |
print(model.predict([[11.27285579,13.11888698,13.04983865,7.160173909,11.84600012,11.38408063,12.46225539,10.35803641,10.43634604,10.31537082,8.195574032,11.00985731, 9.804574801, 7.811523898,9.271842845,8.808279933,8.473070081,8.818380484,9.115116886, 9.315489635 |
|
|
34 |
]])) |