|
a |
|
b/Lung_cancer.py |
|
|
1 |
import numpy as np # importing numpy for numerical calculations |
|
|
2 |
|
|
|
3 |
import pandas as pd # importing pandas for creating data frames |
|
|
4 |
from sklearn.model_selection import train_test_split # train_tets_split for spliting the data into training an testing |
|
|
5 |
from sklearn.linear_model import LogisticRegression # for logistic resgression |
|
|
6 |
from sklearn.ensemble import RandomForestClassifier # for random forest classifierfrom sklearn.ensemble import GradientBoostingClassifier # For gradienboosting classifier |
|
|
7 |
from sklearn.metrics import accuracy_score # importing metrics for measuring accuracy |
|
|
8 |
from sklearn.metrics import mean_squared_error # for calculating mean squre errors |
|
|
9 |
|
|
|
10 |
df = pd.read_csv("lung_cancer_examples.csv") # reading csv data |
|
|
11 |
print(df.sample) |
|
|
12 |
|
|
|
13 |
X = df.drop[['Name', 'Surname', 'Result']] |
|
|
14 |
y = df.iloc[:, -1] |
|
|
15 |
|
|
|
16 |
from sklearn.model_selection import train_test_split |
|
|
17 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=10) |
|
|
18 |
|
|
|
19 |
class logistic_regression: # creating a logistic regression class |
|
|
20 |
def logistic(self, X_train, y_train): # creating a function that will create a model and after training it will give accuracy |
|
|
21 |
# Here we are creating a decision tree model using LogisticRegression. to do that we have to fit the training data (X_train, y_rain) into model_lr object |
|
|
22 |
from sklearn.linear_model import LogisticRegression |
|
|
23 |
model_lr = LogisticRegression() # creating a logistic model |
|
|
24 |
model_lr.fit(X_train, y_train) |
|
|
25 |
# here we are predicting the Logistic Regression model on X_test [testing data] |
|
|
26 |
self.y_pred_lr = model_lr.predict(X_test) |
|
|
27 |
#print("Mean square error for logistic regression model: ", mean_squared_error(y_test, y_pred_lr)) # will give mean square error of the model |
|
|
28 |
# accuracy_score will take y_test(actual value) and y_pred_lr(predicted value) and it will give the accuracy of the model |
|
|
29 |
print("Logistic Regression model Accuracy :", accuracy_score(y_test, self.y_pred_lr)*100, "%") |
|
|
30 |
def mean_absolute_error(self): |
|
|
31 |
print("Mean Absoluter error of logistic Regression :", np.square(y_test - self.y_pred_lr).mean()) # calculating mean absolute error of LogisticRegression model |
|
|
32 |
|
|
|
33 |
def variance_bias(self): |
|
|
34 |
Variance = np.var(self.y_pred_lr) # calculating variance in the predicted output |
|
|
35 |
print("Variance of LogisticRegression model is :", Variance) |
|
|
36 |
SSE = np.mean((np.mean(self.y_pred_lr) - y_test)** 2) # calculating s=sum of square error |
|
|
37 |
Bias = SSE - Variance # calculating Bias taking a difference between SSE and Variance |
|
|
38 |
print("Bias of LogisticRegression model is :", Bias) |
|
|
39 |
|
|
|
40 |
class gradient_boosting: # creating a logistic regression class |
|
|
41 |
def gb(self, X_train, y_train): # creating a function that will create a model and after training it will give accuracy |
|
|
42 |
# Here we are creating a decision tree model using LogisticRegression. to do that we have to fit the training data (X_train, y_rain) into model_lr object |
|
|
43 |
from sklearn.ensemble import GradientBoostingClassifier |
|
|
44 |
model_gbc = GradientBoostingClassifier() # creating a logistic model |
|
|
45 |
model_gbc.fit(X_train, y_train) |
|
|
46 |
# here we are predicting the Logistic Regression model on X_test [testing data] |
|
|
47 |
self.y_pred_gbc = model_gbc.predict(X_test) |
|
|
48 |
#print("Mean square error for logistic regression model: ", mean_squared_error(y_test, y_pred_lr)) # will give mean square error of the model |
|
|
49 |
# accuracy_score will take y_test(actual value) and y_pred_lr(predicted value) and it will give the accuracy of the model |
|
|
50 |
print("Logistic Regression model Accuracy :", accuracy_score(y_test, self.y_pred_gbc)*100, "%") |
|
|
51 |
def mean_absolute_error(self): |
|
|
52 |
print("Mean Absoluter error of logistic Regression :", np.square(y_test - self.y_pred_gbc).mean()) # calculating mean absolute error of LogisticRegression model |
|
|
53 |
|
|
|
54 |
def variance_bias(self): |
|
|
55 |
Variance = np.var(self.y_pred_gbc) # calculating variance in the predicted output |
|
|
56 |
print("Variance of LogisticRegression model is :", Variance) |
|
|
57 |
SSE = np.mean((np.mean(self.y_pred_gbc) - y_test)** 2) # calculating s=sum of square error |
|
|
58 |
Bias = SSE - Variance # calculating Bias taking a difference between SSE and Variance |
|
|
59 |
print("Bias of LogisticRegression model is :", Bias) |
|
|
60 |
|
|
|
61 |
class random_forest_classifier: |
|
|
62 |
def random_forest(self, X_train, y_train): |
|
|
63 |
# Here we are creating a decision tree model using RandomForestClassifier. to do that we have to fit the training data (X_train, y_rain) into model_rc object |
|
|
64 |
from sklearn.ensemble import RandomForestClassifier |
|
|
65 |
self.model_rf = RandomForestClassifier() |
|
|
66 |
self.model_rf.fit(X_train, y_train) |
|
|
67 |
# here we are predicting the Random Forest Classifier model on X_test [testing data] |
|
|
68 |
self.y_pred_rf = self.model_rf.predict(X_test) |
|
|
69 |
print("Mean square error for random forest model: ", mean_squared_error(y_test, self.y_pred_rf)) # will give mean square error of the model |
|
|
70 |
# accuracy_score will take y_test(actual value) and y_pred_rc(predicted value) and it will give the accuracy of the model |
|
|
71 |
print("Random Forest model accuracy :", accuracy_score(y_test, self.y_pred_rf)*100, "%") |
|
|
72 |
|
|
|
73 |
def mean_absolute_error(self): |
|
|
74 |
print("Mean Absoluter error of Random Forest :", np.square(y_test - self.y_pred_rf).mean()) # calculating mean absolute error of RandomForest model |
|
|
75 |
def variance_bias(self): |
|
|
76 |
Variance = np.var(self.y_pred_rf) # calculating variance in the predicted output |
|
|
77 |
print("Variance of RandomForest model is :", Variance) |
|
|
78 |
SSE = np.mean((np.mean(self.y_pred_rf) - y_test)** 2) # calculating s=sum of square error |
|
|
79 |
Bias = SSE - Variance # calculating Bias taking a difference between SSE and Variance |
|
|
80 |
print("Bias of RandomForest model is :", Bias) |
|
|
81 |
|
|
|
82 |
print("-------LUNG CANCER PREDICTION USING LOGISTIC REGRESSION--------") |
|
|
83 |
# calling the class logistic_regression and creating object. |
|
|
84 |
logistic = logistic_regression() |
|
|
85 |
# calling logistic function that accepts two parameters i.e X_train, y_train |
|
|
86 |
print(logistic.logistic(X_train, y_train)) |
|
|
87 |
# getting accuracy of logistic regression model |
|
|
88 |
print(logistic.mean_absolute_error()) # getting mean absolute error |
|
|
89 |
print(logistic.variance_bias()) # getting variance and bias |
|
|
90 |
print("-------LUNG CANCER PREDICTION USING GRADIENT BOOSTING CLASSIFIER--------") |
|
|
91 |
# calling the class gradient_boosting and creating object. |
|
|
92 |
gbc = gradient_boosting() |
|
|
93 |
# calling gb function that accepts two parameters i.e X_train, y_train |
|
|
94 |
print(gbc.gb(X_train, y_train)) |
|
|
95 |
# getting accuracy of GradientBoostingClassifier model |
|
|
96 |
print(gbc.mean_absolute_error()) # getting mean absolute error |
|
|
97 |
print(gbc.variance_bias()) # getting variance and bias |
|
|
98 |
print("-------LUNG CANCER PREDICTION USING RANDOM FOREST CLASSIFIER--------") |
|
|
99 |
# calling the class random_forest_classifier and creating object. |
|
|
100 |
rf_classifier = random_forest_classifier() |
|
|
101 |
# calling random_forest function that accepts two parameters i.e X_train, y_train |
|
|
102 |
print(rf_classifier.random_forest(X_train, y_train)) # getting accuracy of of random forest model |
|
|
103 |
print(rf_classifier.mean_absolute_error()) # getting mean absolute error |
|
|
104 |
print(rf_classifier.variance_bias()) # getting variance and bias |