Diff of /Lung_cancer.py [000000] .. [fc2931]

Switch to side-by-side view

--- a
+++ b/Lung_cancer.py
@@ -0,0 +1,104 @@
+import numpy as np   # importing numpy for numerical calculations
+
+import pandas as pd # importing  pandas for creating data frames
+from sklearn.model_selection import train_test_split  # train_tets_split for spliting the data into training an testing
+ from sklearn.linear_model import LogisticRegression  # for logistic resgression
+from sklearn.ensemble import RandomForestClassifier # for random forest classifierfrom sklearn.ensemble import GradientBoostingClassifier     # For gradienboosting classifier
+from sklearn.metrics import accuracy_score # importing metrics for measuring accuracy
+from sklearn.metrics import mean_squared_error  # for calculating mean squre errors
+
+df = pd.read_csv("lung_cancer_examples.csv")  # reading csv data
+print(df.sample)
+
+X = df.drop[['Name', 'Surname', 'Result']]
+y = df.iloc[:, -1]
+
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=10)
+
+class logistic_regression:    # creating a logistic regression class
+    def logistic(self, X_train, y_train):    # creating a function that will create a model and after training it will give accuracy
+         # Here we are creating a decision tree model using LogisticRegression. to do that we have to fit the training data (X_train, y_rain) into model_lr object
+         from sklearn.linear_model import LogisticRegression
+         model_lr = LogisticRegression()    # creating a logistic model
+         model_lr.fit(X_train, y_train)
+         # here we are predicting the Logistic Regression model on X_test [testing data]
+         self.y_pred_lr = model_lr.predict(X_test)
+         #print("Mean square error for logistic regression model: ", mean_squared_error(y_test, y_pred_lr)) # will give mean square error of the model
+         # accuracy_score will take y_test(actual value) and y_pred_lr(predicted value) and it will give the accuracy of the model
+         print("Logistic Regression model Accuracy               :", accuracy_score(y_test, self.y_pred_lr)*100, "%")
+    def mean_absolute_error(self):
+         print("Mean Absoluter error of logistic Regression     :", np.square(y_test - self.y_pred_lr).mean()) # calculating mean absolute error of LogisticRegression model
+
+    def variance_bias(self):
+        Variance = np.var(self.y_pred_lr)     # calculating variance in the predicted output
+        print("Variance of LogisticRegression model is         :", Variance)
+        SSE = np.mean((np.mean(self.y_pred_lr) - y_test)** 2)  # calculating s=sum of square error
+        Bias = SSE - Variance                         # calculating Bias taking a difference between SSE and Variance
+        print("Bias of LogisticRegression model is             :", Bias)
+
+class gradient_boosting:    # creating a logistic regression class
+    def gb(self, X_train, y_train):    # creating a function that will create a model and after training it will give accuracy
+         # Here we are creating a decision tree model using LogisticRegression. to do that we have to fit the training data (X_train, y_rain) into model_lr object
+         from sklearn.ensemble import GradientBoostingClassifier
+         model_gbc = GradientBoostingClassifier()    # creating a logistic model
+         model_gbc.fit(X_train, y_train)
+         # here we are predicting the Logistic Regression model on X_test [testing data]
+         self.y_pred_gbc = model_gbc.predict(X_test)
+         #print("Mean square error for logistic regression model: ", mean_squared_error(y_test, y_pred_lr)) # will give mean square error of the model
+         # accuracy_score will take y_test(actual value) and y_pred_lr(predicted value) and it will give the accuracy of the model
+         print("Logistic Regression model Accuracy               :", accuracy_score(y_test, self.y_pred_gbc)*100, "%")
+    def mean_absolute_error(self):
+         print("Mean Absoluter error of logistic Regression     :", np.square(y_test - self.y_pred_gbc).mean()) # calculating mean absolute error of LogisticRegression model
+
+    def variance_bias(self):
+        Variance = np.var(self.y_pred_gbc)     # calculating variance in the predicted output
+        print("Variance of LogisticRegression model is         :", Variance)
+        SSE = np.mean((np.mean(self.y_pred_gbc) - y_test)** 2)  # calculating s=sum of square error
+        Bias = SSE - Variance                         # calculating Bias taking a difference between SSE and Variance
+        print("Bias of LogisticRegression model is             :", Bias)
+
+class random_forest_classifier:
+    def random_forest(self, X_train, y_train):
+               # Here we are creating a decision tree model using RandomForestClassifier. to do that we have to fit the training data (X_train, y_rain) into model_rc object
+               from sklearn.ensemble import RandomForestClassifier
+               self.model_rf = RandomForestClassifier()
+               self.model_rf.fit(X_train, y_train)
+               # here we are predicting the  Random Forest Classifier model on X_test [testing data]
+               self.y_pred_rf = self.model_rf.predict(X_test)
+               print("Mean square error for random forest model: ", mean_squared_error(y_test, self.y_pred_rf)) # will give mean square error of the model
+               # accuracy_score will take y_test(actual value) and y_pred_rc(predicted value) and it will give the accuracy of the model
+               print("Random Forest model accuracy              :",  accuracy_score(y_test, self.y_pred_rf)*100, "%")
+   
+    def mean_absolute_error(self):
+               print("Mean Absoluter error of Random Forest     :", np.square(y_test - self.y_pred_rf).mean()) # calculating mean absolute error of RandomForest model
+    def variance_bias(self):
+               Variance = np.var(self.y_pred_rf)     # calculating variance in the predicted output
+               print("Variance of RandomForest model is         :", Variance)
+               SSE = np.mean((np.mean(self.y_pred_rf) - y_test)** 2)  # calculating s=sum of square error
+               Bias = SSE - Variance                         # calculating Bias taking a difference between SSE and Variance
+               print("Bias of RandomForest model is             :", Bias)
+
+print("-------LUNG CANCER PREDICTION USING LOGISTIC REGRESSION--------")
+# calling the class logistic_regression and creating object.
+logistic = logistic_regression()
+# calling logistic function that accepts two parameters i.e X_train, y_train
+print(logistic.logistic(X_train, y_train))
+# getting accuracy of logistic regression model
+print(logistic.mean_absolute_error())        # getting mean absolute error
+print(logistic.variance_bias())           # getting variance and bias
+print("-------LUNG CANCER PREDICTION USING GRADIENT BOOSTING CLASSIFIER--------")
+# calling the class gradient_boosting and creating object.
+gbc = gradient_boosting()
+# calling gb function that accepts two parameters i.e X_train, y_train
+print(gbc.gb(X_train, y_train))
+# getting accuracy of GradientBoostingClassifier model
+print(gbc.mean_absolute_error())        # getting mean absolute error
+print(gbc.variance_bias())              # getting variance and bias
+print("-------LUNG CANCER PREDICTION USING RANDOM FOREST CLASSIFIER--------")
+# calling the class random_forest_classifier and creating object.
+rf_classifier = random_forest_classifier()
+# calling random_forest function that accepts two parameters i.e X_train, y_train
+print(rf_classifier.random_forest(X_train, y_train)) # getting accuracy of of random forest model
+print(rf_classifier.mean_absolute_error())     # getting mean absolute error
+print(rf_classifier.variance_bias())           # getting variance and bias