LUNG-CANCER-PREDICTION-IN / Git / [4db6e1] /Lung

Models:
RichardZick/
LUNG-CANCER-PREDICTION-IN
Downloads: 1
[4db6e1]: / Lung_cancer.py
History
Download this file
105 lines (93 with data), 7.7 kB

import numpy as np   # importing numpy for numerical calculations

import pandas as pd # importing  pandas for creating data frames
from sklearn.model_selection import train_test_split  # train_tets_split for spliting the data into training an testing
 from sklearn.linear_model import LogisticRegression  # for logistic resgression
from sklearn.ensemble import RandomForestClassifier # for random forest classifierfrom sklearn.ensemble import GradientBoostingClassifier     # For gradienboosting classifier
from sklearn.metrics import accuracy_score # importing metrics for measuring accuracy
from sklearn.metrics import mean_squared_error  # for calculating mean squre errors

df = pd.read_csv("lung_cancer_examples.csv")  # reading csv data
print(df.sample)

X = df.drop[['Name', 'Surname', 'Result']]
y = df.iloc[:, -1]

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=10)

class logistic_regression:    # creating a logistic regression class
    def logistic(self, X_train, y_train):    # creating a function that will create a model and after training it will give accuracy
         # Here we are creating a decision tree model using LogisticRegression. to do that we have to fit the training data (X_train, y_rain) into model_lr object
         from sklearn.linear_model import LogisticRegression
         model_lr = LogisticRegression()    # creating a logistic model
         model_lr.fit(X_train, y_train)
         # here we are predicting the Logistic Regression model on X_test [testing data]
         self.y_pred_lr = model_lr.predict(X_test)
         #print("Mean square error for logistic regression model: ", mean_squared_error(y_test, y_pred_lr)) # will give mean square error of the model
         # accuracy_score will take y_test(actual value) and y_pred_lr(predicted value) and it will give the accuracy of the model
         print("Logistic Regression model Accuracy               :", accuracy_score(y_test, self.y_pred_lr)*100, "%")
    def mean_absolute_error(self):
         print("Mean Absoluter error of logistic Regression     :", np.square(y_test - self.y_pred_lr).mean()) # calculating mean absolute error of LogisticRegression model

    def variance_bias(self):
        Variance = np.var(self.y_pred_lr)     # calculating variance in the predicted output
        print("Variance of LogisticRegression model is         :", Variance)
        SSE = np.mean((np.mean(self.y_pred_lr) - y_test)** 2)  # calculating s=sum of square error
        Bias = SSE - Variance                         # calculating Bias taking a difference between SSE and Variance
        print("Bias of LogisticRegression model is             :", Bias)

class gradient_boosting:    # creating a logistic regression class
    def gb(self, X_train, y_train):    # creating a function that will create a model and after training it will give accuracy
         # Here we are creating a decision tree model using LogisticRegression. to do that we have to fit the training data (X_train, y_rain) into model_lr object
         from sklearn.ensemble import GradientBoostingClassifier
         model_gbc = GradientBoostingClassifier()    # creating a logistic model
         model_gbc.fit(X_train, y_train)
         # here we are predicting the Logistic Regression model on X_test [testing data]
         self.y_pred_gbc = model_gbc.predict(X_test)
         #print("Mean square error for logistic regression model: ", mean_squared_error(y_test, y_pred_lr)) # will give mean square error of the model
         # accuracy_score will take y_test(actual value) and y_pred_lr(predicted value) and it will give the accuracy of the model
         print("Logistic Regression model Accuracy               :", accuracy_score(y_test, self.y_pred_gbc)*100, "%")
    def mean_absolute_error(self):
         print("Mean Absoluter error of logistic Regression     :", np.square(y_test - self.y_pred_gbc).mean()) # calculating mean absolute error of LogisticRegression model

    def variance_bias(self):
        Variance = np.var(self.y_pred_gbc)     # calculating variance in the predicted output
        print("Variance of LogisticRegression model is         :", Variance)
        SSE = np.mean((np.mean(self.y_pred_gbc) - y_test)** 2)  # calculating s=sum of square error
        Bias = SSE - Variance                         # calculating Bias taking a difference between SSE and Variance
        print("Bias of LogisticRegression model is             :", Bias)

class random_forest_classifier:
    def random_forest(self, X_train, y_train):
               # Here we are creating a decision tree model using RandomForestClassifier. to do that we have to fit the training data (X_train, y_rain) into model_rc object
               from sklearn.ensemble import RandomForestClassifier
               self.model_rf = RandomForestClassifier()
               self.model_rf.fit(X_train, y_train)
               # here we are predicting the  Random Forest Classifier model on X_test [testing data]
               self.y_pred_rf = self.model_rf.predict(X_test)
               print("Mean square error for random forest model: ", mean_squared_error(y_test, self.y_pred_rf)) # will give mean square error of the model
               # accuracy_score will take y_test(actual value) and y_pred_rc(predicted value) and it will give the accuracy of the model
               print("Random Forest model accuracy              :",  accuracy_score(y_test, self.y_pred_rf)*100, "%")
   
    def mean_absolute_error(self):
               print("Mean Absoluter error of Random Forest     :", np.square(y_test - self.y_pred_rf).mean()) # calculating mean absolute error of RandomForest model
    def variance_bias(self):
               Variance = np.var(self.y_pred_rf)     # calculating variance in the predicted output
               print("Variance of RandomForest model is         :", Variance)
               SSE = np.mean((np.mean(self.y_pred_rf) - y_test)** 2)  # calculating s=sum of square error
               Bias = SSE - Variance                         # calculating Bias taking a difference between SSE and Variance
               print("Bias of RandomForest model is             :", Bias)

print("-------LUNG CANCER PREDICTION USING LOGISTIC REGRESSION--------")
# calling the class logistic_regression and creating object.
logistic = logistic_regression()
# calling logistic function that accepts two parameters i.e X_train, y_train
print(logistic.logistic(X_train, y_train))
# getting accuracy of logistic regression model
print(logistic.mean_absolute_error())        # getting mean absolute error
print(logistic.variance_bias())           # getting variance and bias
print("-------LUNG CANCER PREDICTION USING GRADIENT BOOSTING CLASSIFIER--------")
# calling the class gradient_boosting and creating object.
gbc = gradient_boosting()
# calling gb function that accepts two parameters i.e X_train, y_train
print(gbc.gb(X_train, y_train))
# getting accuracy of GradientBoostingClassifier model
print(gbc.mean_absolute_error())        # getting mean absolute error
print(gbc.variance_bias())              # getting variance and bias
print("-------LUNG CANCER PREDICTION USING RANDOM FOREST CLASSIFIER--------")
# calling the class random_forest_classifier and creating object.
rf_classifier = random_forest_classifier()
# calling random_forest function that accepts two parameters i.e X_train, y_train
print(rf_classifier.random_forest(X_train, y_train)) # getting accuracy of of random forest model
print(rf_classifier.mean_absolute_error())     # getting mean absolute error
print(rf_classifier.variance_bias())           # getting variance and bias