a b/Lung_cancer.py
1
import numpy as np   # importing numpy for numerical calculations
2
3
import pandas as pd # importing  pandas for creating data frames
4
from sklearn.model_selection import train_test_split  # train_tets_split for spliting the data into training an testing
5
 from sklearn.linear_model import LogisticRegression  # for logistic resgression
6
from sklearn.ensemble import RandomForestClassifier # for random forest classifierfrom sklearn.ensemble import GradientBoostingClassifier     # For gradienboosting classifier
7
from sklearn.metrics import accuracy_score # importing metrics for measuring accuracy
8
from sklearn.metrics import mean_squared_error  # for calculating mean squre errors
9
10
df = pd.read_csv("lung_cancer_examples.csv")  # reading csv data
11
print(df.sample)
12
13
X = df.drop[['Name', 'Surname', 'Result']]
14
y = df.iloc[:, -1]
15
16
from sklearn.model_selection import train_test_split
17
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=10)
18
19
class logistic_regression:    # creating a logistic regression class
20
    def logistic(self, X_train, y_train):    # creating a function that will create a model and after training it will give accuracy
21
         # Here we are creating a decision tree model using LogisticRegression. to do that we have to fit the training data (X_train, y_rain) into model_lr object
22
         from sklearn.linear_model import LogisticRegression
23
         model_lr = LogisticRegression()    # creating a logistic model
24
         model_lr.fit(X_train, y_train)
25
         # here we are predicting the Logistic Regression model on X_test [testing data]
26
         self.y_pred_lr = model_lr.predict(X_test)
27
         #print("Mean square error for logistic regression model: ", mean_squared_error(y_test, y_pred_lr)) # will give mean square error of the model
28
         # accuracy_score will take y_test(actual value) and y_pred_lr(predicted value) and it will give the accuracy of the model
29
         print("Logistic Regression model Accuracy               :", accuracy_score(y_test, self.y_pred_lr)*100, "%")
30
    def mean_absolute_error(self):
31
         print("Mean Absoluter error of logistic Regression     :", np.square(y_test - self.y_pred_lr).mean()) # calculating mean absolute error of LogisticRegression model
32
33
    def variance_bias(self):
34
        Variance = np.var(self.y_pred_lr)     # calculating variance in the predicted output
35
        print("Variance of LogisticRegression model is         :", Variance)
36
        SSE = np.mean((np.mean(self.y_pred_lr) - y_test)** 2)  # calculating s=sum of square error
37
        Bias = SSE - Variance                         # calculating Bias taking a difference between SSE and Variance
38
        print("Bias of LogisticRegression model is             :", Bias)
39
40
class gradient_boosting:    # creating a logistic regression class
41
    def gb(self, X_train, y_train):    # creating a function that will create a model and after training it will give accuracy
42
         # Here we are creating a decision tree model using LogisticRegression. to do that we have to fit the training data (X_train, y_rain) into model_lr object
43
         from sklearn.ensemble import GradientBoostingClassifier
44
         model_gbc = GradientBoostingClassifier()    # creating a logistic model
45
         model_gbc.fit(X_train, y_train)
46
         # here we are predicting the Logistic Regression model on X_test [testing data]
47
         self.y_pred_gbc = model_gbc.predict(X_test)
48
         #print("Mean square error for logistic regression model: ", mean_squared_error(y_test, y_pred_lr)) # will give mean square error of the model
49
         # accuracy_score will take y_test(actual value) and y_pred_lr(predicted value) and it will give the accuracy of the model
50
         print("Logistic Regression model Accuracy               :", accuracy_score(y_test, self.y_pred_gbc)*100, "%")
51
    def mean_absolute_error(self):
52
         print("Mean Absoluter error of logistic Regression     :", np.square(y_test - self.y_pred_gbc).mean()) # calculating mean absolute error of LogisticRegression model
53
54
    def variance_bias(self):
55
        Variance = np.var(self.y_pred_gbc)     # calculating variance in the predicted output
56
        print("Variance of LogisticRegression model is         :", Variance)
57
        SSE = np.mean((np.mean(self.y_pred_gbc) - y_test)** 2)  # calculating s=sum of square error
58
        Bias = SSE - Variance                         # calculating Bias taking a difference between SSE and Variance
59
        print("Bias of LogisticRegression model is             :", Bias)
60
61
class random_forest_classifier:
62
    def random_forest(self, X_train, y_train):
63
               # Here we are creating a decision tree model using RandomForestClassifier. to do that we have to fit the training data (X_train, y_rain) into model_rc object
64
               from sklearn.ensemble import RandomForestClassifier
65
               self.model_rf = RandomForestClassifier()
66
               self.model_rf.fit(X_train, y_train)
67
               # here we are predicting the  Random Forest Classifier model on X_test [testing data]
68
               self.y_pred_rf = self.model_rf.predict(X_test)
69
               print("Mean square error for random forest model: ", mean_squared_error(y_test, self.y_pred_rf)) # will give mean square error of the model
70
               # accuracy_score will take y_test(actual value) and y_pred_rc(predicted value) and it will give the accuracy of the model
71
               print("Random Forest model accuracy              :",  accuracy_score(y_test, self.y_pred_rf)*100, "%")
72
   
73
    def mean_absolute_error(self):
74
               print("Mean Absoluter error of Random Forest     :", np.square(y_test - self.y_pred_rf).mean()) # calculating mean absolute error of RandomForest model
75
    def variance_bias(self):
76
               Variance = np.var(self.y_pred_rf)     # calculating variance in the predicted output
77
               print("Variance of RandomForest model is         :", Variance)
78
               SSE = np.mean((np.mean(self.y_pred_rf) - y_test)** 2)  # calculating s=sum of square error
79
               Bias = SSE - Variance                         # calculating Bias taking a difference between SSE and Variance
80
               print("Bias of RandomForest model is             :", Bias)
81
82
print("-------LUNG CANCER PREDICTION USING LOGISTIC REGRESSION--------")
83
# calling the class logistic_regression and creating object.
84
logistic = logistic_regression()
85
# calling logistic function that accepts two parameters i.e X_train, y_train
86
print(logistic.logistic(X_train, y_train))
87
# getting accuracy of logistic regression model
88
print(logistic.mean_absolute_error())        # getting mean absolute error
89
print(logistic.variance_bias())           # getting variance and bias
90
print("-------LUNG CANCER PREDICTION USING GRADIENT BOOSTING CLASSIFIER--------")
91
# calling the class gradient_boosting and creating object.
92
gbc = gradient_boosting()
93
# calling gb function that accepts two parameters i.e X_train, y_train
94
print(gbc.gb(X_train, y_train))
95
# getting accuracy of GradientBoostingClassifier model
96
print(gbc.mean_absolute_error())        # getting mean absolute error
97
print(gbc.variance_bias())              # getting variance and bias
98
print("-------LUNG CANCER PREDICTION USING RANDOM FOREST CLASSIFIER--------")
99
# calling the class random_forest_classifier and creating object.
100
rf_classifier = random_forest_classifier()
101
# calling random_forest function that accepts two parameters i.e X_train, y_train
102
print(rf_classifier.random_forest(X_train, y_train)) # getting accuracy of of random forest model
103
print(rf_classifier.mean_absolute_error())     # getting mean absolute error
104
print(rf_classifier.variance_bias())           # getting variance and bias