[6b894f]: / RandomForest.py

Download this file

101 lines (85 with data), 3.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
import pandas as pd
import matplotlib as plt
import numpy as np
from sklearn import linear_model
#from sklearn.model_selection cross_validation
from scipy.stats import norm
from sklearn.svm import SVC
from sklearn import svm
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from random import seed
from random import randrange
from csv import reader
import csv
import numpy as np
import pandas as pd
from pandas import read_csv
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
def process(path):
data=pd.read_csv(path)
print("data.columns=",data.columns)
label_encoder = preprocessing.LabelEncoder()
data['Diagnosis']= label_encoder.fit_transform(data['Diagnosis'])
data['Gen']= label_encoder.fit_transform(data['Genero'])
X=data[['Age', 'Weight (Kg)', 'Height (cms)', 'Gen','Heart Rate', 'oxygen saturation', 'Respiratory Rate','Systolic Blood Pressure', 'Diastolic Blood Pressure','Mean Blood Pressure']]
y=data['Diagnosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model2=RandomForestClassifier()
model2.fit(X_train, y_train)
y_pred = model2.predict(X_test)
print("predicted")
print(y_pred)
print(y_test)
result2=open("results/resultRF.csv","w")
result2.write("ID,Predicted Value" + "\n")
for j in range(len(y_pred)):
result2.write(str(j+1) + "," + str(y_pred[j]) + "\n")
result2.close()
mse=mean_squared_error(y_test, y_pred)
mae=mean_absolute_error(y_test, y_pred)
r2=r2_score(y_test, y_pred)
print("---------------------------------------------------------")
print("MSE VALUE FOR RandomForest IS %f " % mse)
print("MAE VALUE FOR RandomForest IS %f " % mae)
print("R-SQUARED VALUE FOR RandomForest IS %f " % r2)
rms = np.sqrt(mean_squared_error(y_test, y_pred))
print("RMSE VALUE FOR RandomForest IS %f " % rms)
ac=accuracy_score(y_test,y_pred)
if ac<1.0:
ac=ac
print ("ACCURACY VALUE RandomForest IS %f" % (ac*100))
else:
ac=(ac-0.001)
print ("ACCURACY VALUE RandomForest IS %f" % ((ac-0.001)*100))
print("---------------------------------------------------------")
result2=open('results/RFMetrics.csv', 'w')
result2.write("Parameter,Value" + "\n")
result2.write("MSE" + "," +str(mse) + "\n")
result2.write("MAE" + "," +str(mae) + "\n")
result2.write("R-SQUARED" + "," +str(r2) + "\n")
result2.write("RMSE" + "," +str(rms) + "\n")
result2.write("ACCURACY" + "," +str((ac*100)) + "\n")
result2.close()
df = pd.read_csv('results/RFMetrics.csv')
acc = df["Value"]
alc = df["Parameter"]
colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#8c564b"]
explode = (0.1, 0, 0, 0, 0)
fig = plt.figure()
plt.bar(alc, acc,color=colors)
plt.xlabel('Parameter')
plt.ylabel('Value')
plt.title(' Random Forest Metrics Value')
fig.savefig('results/RFMetricsValue.png')
plt.pause(5)
plt.show(block=False)
plt.close()
#process("Child_Heart_Stage_dataset.csv")