--- a +++ b/Lung_cancer.py @@ -0,0 +1,104 @@ +import numpy as np # importing numpy for numerical calculations + +import pandas as pd # importing pandas for creating data frames +from sklearn.model_selection import train_test_split # train_tets_split for spliting the data into training an testing + from sklearn.linear_model import LogisticRegression # for logistic resgression +from sklearn.ensemble import RandomForestClassifier # for random forest classifierfrom sklearn.ensemble import GradientBoostingClassifier # For gradienboosting classifier +from sklearn.metrics import accuracy_score # importing metrics for measuring accuracy +from sklearn.metrics import mean_squared_error # for calculating mean squre errors + +df = pd.read_csv("lung_cancer_examples.csv") # reading csv data +print(df.sample) + +X = df.drop[['Name', 'Surname', 'Result']] +y = df.iloc[:, -1] + +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=10) + +class logistic_regression: # creating a logistic regression class + def logistic(self, X_train, y_train): # creating a function that will create a model and after training it will give accuracy + # Here we are creating a decision tree model using LogisticRegression. to do that we have to fit the training data (X_train, y_rain) into model_lr object + from sklearn.linear_model import LogisticRegression + model_lr = LogisticRegression() # creating a logistic model + model_lr.fit(X_train, y_train) + # here we are predicting the Logistic Regression model on X_test [testing data] + self.y_pred_lr = model_lr.predict(X_test) + #print("Mean square error for logistic regression model: ", mean_squared_error(y_test, y_pred_lr)) # will give mean square error of the model + # accuracy_score will take y_test(actual value) and y_pred_lr(predicted value) and it will give the accuracy of the model + print("Logistic Regression model Accuracy :", accuracy_score(y_test, self.y_pred_lr)*100, "%") + def mean_absolute_error(self): + print("Mean Absoluter error of logistic Regression :", np.square(y_test - self.y_pred_lr).mean()) # calculating mean absolute error of LogisticRegression model + + def variance_bias(self): + Variance = np.var(self.y_pred_lr) # calculating variance in the predicted output + print("Variance of LogisticRegression model is :", Variance) + SSE = np.mean((np.mean(self.y_pred_lr) - y_test)** 2) # calculating s=sum of square error + Bias = SSE - Variance # calculating Bias taking a difference between SSE and Variance + print("Bias of LogisticRegression model is :", Bias) + +class gradient_boosting: # creating a logistic regression class + def gb(self, X_train, y_train): # creating a function that will create a model and after training it will give accuracy + # Here we are creating a decision tree model using LogisticRegression. to do that we have to fit the training data (X_train, y_rain) into model_lr object + from sklearn.ensemble import GradientBoostingClassifier + model_gbc = GradientBoostingClassifier() # creating a logistic model + model_gbc.fit(X_train, y_train) + # here we are predicting the Logistic Regression model on X_test [testing data] + self.y_pred_gbc = model_gbc.predict(X_test) + #print("Mean square error for logistic regression model: ", mean_squared_error(y_test, y_pred_lr)) # will give mean square error of the model + # accuracy_score will take y_test(actual value) and y_pred_lr(predicted value) and it will give the accuracy of the model + print("Logistic Regression model Accuracy :", accuracy_score(y_test, self.y_pred_gbc)*100, "%") + def mean_absolute_error(self): + print("Mean Absoluter error of logistic Regression :", np.square(y_test - self.y_pred_gbc).mean()) # calculating mean absolute error of LogisticRegression model + + def variance_bias(self): + Variance = np.var(self.y_pred_gbc) # calculating variance in the predicted output + print("Variance of LogisticRegression model is :", Variance) + SSE = np.mean((np.mean(self.y_pred_gbc) - y_test)** 2) # calculating s=sum of square error + Bias = SSE - Variance # calculating Bias taking a difference between SSE and Variance + print("Bias of LogisticRegression model is :", Bias) + +class random_forest_classifier: + def random_forest(self, X_train, y_train): + # Here we are creating a decision tree model using RandomForestClassifier. to do that we have to fit the training data (X_train, y_rain) into model_rc object + from sklearn.ensemble import RandomForestClassifier + self.model_rf = RandomForestClassifier() + self.model_rf.fit(X_train, y_train) + # here we are predicting the Random Forest Classifier model on X_test [testing data] + self.y_pred_rf = self.model_rf.predict(X_test) + print("Mean square error for random forest model: ", mean_squared_error(y_test, self.y_pred_rf)) # will give mean square error of the model + # accuracy_score will take y_test(actual value) and y_pred_rc(predicted value) and it will give the accuracy of the model + print("Random Forest model accuracy :", accuracy_score(y_test, self.y_pred_rf)*100, "%") + + def mean_absolute_error(self): + print("Mean Absoluter error of Random Forest :", np.square(y_test - self.y_pred_rf).mean()) # calculating mean absolute error of RandomForest model + def variance_bias(self): + Variance = np.var(self.y_pred_rf) # calculating variance in the predicted output + print("Variance of RandomForest model is :", Variance) + SSE = np.mean((np.mean(self.y_pred_rf) - y_test)** 2) # calculating s=sum of square error + Bias = SSE - Variance # calculating Bias taking a difference between SSE and Variance + print("Bias of RandomForest model is :", Bias) + +print("-------LUNG CANCER PREDICTION USING LOGISTIC REGRESSION--------") +# calling the class logistic_regression and creating object. +logistic = logistic_regression() +# calling logistic function that accepts two parameters i.e X_train, y_train +print(logistic.logistic(X_train, y_train)) +# getting accuracy of logistic regression model +print(logistic.mean_absolute_error()) # getting mean absolute error +print(logistic.variance_bias()) # getting variance and bias +print("-------LUNG CANCER PREDICTION USING GRADIENT BOOSTING CLASSIFIER--------") +# calling the class gradient_boosting and creating object. +gbc = gradient_boosting() +# calling gb function that accepts two parameters i.e X_train, y_train +print(gbc.gb(X_train, y_train)) +# getting accuracy of GradientBoostingClassifier model +print(gbc.mean_absolute_error()) # getting mean absolute error +print(gbc.variance_bias()) # getting variance and bias +print("-------LUNG CANCER PREDICTION USING RANDOM FOREST CLASSIFIER--------") +# calling the class random_forest_classifier and creating object. +rf_classifier = random_forest_classifier() +# calling random_forest function that accepts two parameters i.e X_train, y_train +print(rf_classifier.random_forest(X_train, y_train)) # getting accuracy of of random forest model +print(rf_classifier.mean_absolute_error()) # getting mean absolute error +print(rf_classifier.variance_bias()) # getting variance and bias