--- a
+++ b/term_project_code_ec346.py
@@ -0,0 +1,399 @@
+# -*- coding: utf-8 -*-
+"""TERM PROJECT CODE - EC346
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/19hzvrRak0RDFpr7coKVyzAWFMKqe9DLQ
+
+### **Imported libraries**
+"""
+
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.svm import SVC
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.metrics import accuracy_score, recall_score, confusion_matrix, precision_score, f1_score, precision_recall_curve, roc_curve, roc_auc_score, auc
+from keras.models import Sequential
+from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
+from keras.utils import to_categorical
+from skimage import io, color, transform
+from keras.preprocessing import image
+from keras.preprocessing.image import ImageDataGenerator
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+"""### ***Base Model 1*** : *K-Nearest Neighbours (KNN)*"""
+
+image_folder = '/content/drive/MyDrive/DatasetForUseKNN'
+
+# Function to load and preprocess images
+def load_and_preprocess_images(folder_path):
+    images = []
+    labels = []
+
+    for label in ['Bleeding', 'NonBleeding']:                                   # Labeling 2 classes as Bleeding/NonBleeding using CamelCase
+        for i in range(1, 1310):                                                # There are 1309 images per class
+            image_path = f"{folder_path}/Images_{label}/img- ({i}).png"
+            img = io.imread(image_path)
+            img_gray = color.rgb2gray(img)
+            img_resized = transform.resize(img_gray, (8, 8), anti_aliasing=True)
+
+            images.append(img_resized.flatten())
+            labels.append(label)
+
+    return np.array(images), np.array(labels)
+
+# Load and preprocess your image data
+X_custom, y_custom = load_and_preprocess_images(image_folder)
+
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X_custom, y_custom, test_size=0.2, random_state=42)
+
+# Initialize the KNN classifier
+knn_classifier = KNeighborsClassifier(n_neighbors=3)
+
+# Train the classifier
+knn_classifier.fit(X_train, y_train)
+
+# Make predictions on the test set
+y_pred = knn_classifier.predict(X_test)
+
+# Evaluate the KNN performance
+KNN_accuracy = accuracy_score(y_test, y_pred)
+print(f"KNN Accuracy: {KNN_accuracy:.2f}")
+
+# Example: Predict a new image
+# A separate .png file has been uploaded in the session storage in Google Colab from the dataset for prediction
+new_image_path = '/content/img- (1113).png'
+new_img = io.imread(new_image_path)
+new_img_gray = color.rgb2gray(new_img)
+new_img_resized = transform.resize(new_img_gray, (8, 8), anti_aliasing=True)
+new_image_flatten = new_img_resized.flatten().reshape(1, -1)
+
+# Make a prediction on the new image
+prediction = knn_classifier.predict(new_image_flatten)
+print(f"Predicted class: {prediction[0]}")
+
+# Visualize the new image
+plt.figure(figsize=(4, 4))
+plt.imshow(new_img_resized, cmap=plt.cm.gray_r, interpolation='nearest')
+plt.title(f"Predicted Class: {prediction[0]}")
+plt.show()
+
+"""### ***Base Model 2*** : *Support Vector Machine (SVM)*"""
+
+image_folder = '/content/drive/MyDrive/DatasetForUseKNN'
+
+# Function to load and preprocess images
+def load_and_preprocess_images(folder_path):
+    images = []
+    labels = []
+
+    for label in ['Bleeding', 'NonBleeding']:                                    # Labeling 2 classes as Bleeding/NonBleeding using CamelCase
+        for i in range(1, 1310):                                                 # There are 1309 images per class
+            image_path = f"{folder_path}/Images_{label}/img- ({i}).png"
+            img = io.imread(image_path)
+            img_gray = color.rgb2gray(img)
+            img_resized = transform.resize(img_gray, (8, 8), anti_aliasing=True)
+
+            images.append(img_resized.flatten())
+            labels.append(label)
+
+    return np.array(images), np.array(labels)
+
+# Load and preprocess your image data
+X_custom, y_custom = load_and_preprocess_images(image_folder)
+
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X_custom, y_custom, test_size=0.2, random_state=42)
+
+# Flatten the image data for SVM
+X_train_flatten = X_train.reshape(X_train.shape[0], -1)
+X_test_flatten = X_test.reshape(X_test.shape[0], -1)
+
+# Initialize the SVM classifier
+svm_classifier = SVC(kernel='rbf', C=10.0, gamma='scale')  # You can experiment with different kernels and C values
+
+# Train the classifier
+svm_classifier.fit(X_train, y_train)
+
+# Make predictions on the test set
+y_pred = svm_classifier.predict(X_test)
+
+# Calculate accuracy
+accuracy = accuracy_score(y_test, y_pred)
+print(f" SVM Accuracy: {accuracy:.2f}")
+
+# Example: Predict a new image
+# A separate .png file has been uploaded in the session storage in Google Colab from the dataset for prediction
+new_image_path = '/content/img- (1113).png'
+new_img = io.imread(new_image_path)
+new_img_gray = color.rgb2gray(new_img)
+new_img_resized = transform.resize(new_img_gray, (8, 8), anti_aliasing=True)
+new_image_flatten = new_img_resized.flatten().reshape(1, -1)
+
+# Make a prediction on the new image
+prediction = svm_classifier.predict(new_image_flatten)
+print(f"Predicted class: {prediction[0]}")
+
+# Visualize the new image
+plt.figure(figsize=(4, 4))
+plt.imshow(new_img_resized, cmap=plt.cm.gray_r, interpolation='nearest')
+plt.title(f"Predicted class: {prediction[0]}")
+plt.show()
+
+"""### *Base Model 3* : *Convolutional Neural Network (CNN)*"""
+
+image_folder = '/content/drive/MyDrive/DatasetForUseKNN'
+
+# Function to load and preprocess images
+def load_and_preprocess_images(folder_path):
+    images = []
+    labels = []
+
+    for label in range(2):                                                      # Assuming you have 2 classes (bleeding and non-bleeding)
+        for i in range(1, 1310):                                                # Assuming you have 1309 images per class
+            if label == 0:
+              wlabel = "Bleeding"
+            else:
+              wlabel = "NonBleeding"
+            image_path = f"{folder_path}/Images_{wlabel}/img- ({i}).png"
+            img = image.load_img(image_path, target_size=(64, 64))              # Resize images to a consistent size
+            img_array = image.img_to_array(img)
+            images.append(img_array)
+            labels.append(label)
+
+    return np.array(images), np.array(labels)
+
+# Load and preprocess your image data
+X, y = load_and_preprocess_images(image_folder)
+
+# FEATURE EXTRACTION for CNN
+# Convert labels to categorical (one-hot encoding)
+y_categorical = to_categorical(y, num_classes=2)
+
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)
+
+# Build the CNN model
+model = Sequential()
+model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)))
+model.add(MaxPooling2D(pool_size=(2, 2)))
+model.add(Conv2D(64, (3, 3), activation='relu'))
+model.add(MaxPooling2D(pool_size=(2, 2)))
+model.add(Flatten())
+model.add(Dense(128, activation='relu'))
+model.add(Dense(2, activation='softmax'))
+
+# Compile the model
+model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
+
+# Data augmentation to generate more training samples
+datagen = ImageDataGenerator(rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
+datagen.fit(X_train)
+
+# Train the model
+model.fit_generator(datagen.flow(X_train, y_train, batch_size=32), epochs=10, validation_data=(X_test, y_test))
+
+# Evaluate the model on the test set
+y_pred = np.argmax(model.predict(X_test), axis=1)
+y_true = np.argmax(y_test, axis=1)
+accuracy = accuracy_score(y_true, y_pred)
+print(f"Accuracy: {accuracy:.2f}")
+
+# Example: Predict a new image
+# A separate .png file has been uploaded in the session storage in Google Colab from the dataset for prediction
+new_image_path = '/content/img- (1113).png'
+new_img = image.load_img(new_image_path, target_size=(64, 64))
+new_img_array = image.img_to_array(new_img)
+new_img_array = np.expand_dims(new_img_array, axis=0)
+new_img_array /= 255.0  # Normalize pixel values
+prediction = model.predict(new_img_array)
+predicted_class = np.argmax(prediction[0])
+print(type(predicted_class))
+if predicted_class == 0:
+  predicted_class = 'Bleeding'
+if predicted_class == 1:
+  predicted_class = 'Non-Bleeding'
+
+# Visualize the new image
+plt.imshow(new_img)
+plt.title(f"Predicted class: {predicted_class}")
+plt.show()
+
+"""### ***Base Model 4*** : *Decision Trees (DT)*"""
+
+image_folder = '/content/drive/MyDrive/DatasetForUseKNN'
+
+# Function to load and preprocess images
+def load_and_preprocess_images(folder_path):
+    images = []
+    labels = []
+
+    for label in ['Bleeding', 'NonBleeding']:                                   # Labeling 2 classes as Bleeding/NonBleeding using CamelCase
+        for i in range(1, 1310):                                                # There are 1309 images per class
+            image_path = f"{folder_path}/Images_{label}/img- ({i}).png"
+            img = io.imread(image_path)
+            img_gray = color.rgb2gray(img)
+            img_resized = transform.resize(img_gray, (8, 8), anti_aliasing=True)
+
+            images.append(img_resized.flatten())
+            labels.append(label)
+
+    return np.array(images), np.array(labels)
+
+# Load and preprocess your image data
+X_custom, y_custom = load_and_preprocess_images(image_folder)
+
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X_custom, y_custom, test_size=0.2, random_state=42)
+
+# Initialize the Decision Tree classifier
+dt_classifier = DecisionTreeClassifier(random_state=42)
+
+# Train the classifier
+dt_classifier.fit(X_train, y_train)
+
+# Make predictions on the test set
+y_pred = dt_classifier.predict(X_test)
+
+# Calculate accuracy
+accuracy = accuracy_score(y_test, y_pred)
+print(f"Accuracy: {accuracy:.2f}")
+
+# Example: Predict a new image
+# A separate .png file has been uploaded in the session storage in Google Colab from the dataset for prediction
+new_image_path = '/content/img- (1113).png'
+new_img = io.imread(new_image_path)
+new_img_gray = color.rgb2gray(new_img)
+new_img_resized = transform.resize(new_img_gray, (8, 8), anti_aliasing=True)
+new_image_flatten = new_img_resized.flatten().reshape(1, -1)
+
+# Make a prediction on the new image
+prediction = dt_classifier.predict(new_image_flatten)
+print(f"Predicted class: {prediction[0]}")
+
+# Visualize the new image
+plt.figure(figsize=(4, 4))
+plt.imshow(new_img_resized, cmap=plt.cm.gray_r, interpolation='nearest')
+plt.title(f"Predicted class: {prediction[0]}")
+plt.show()
+
+image_folder = '/content/drive/MyDrive/DatasetForUseKNN'
+
+# Function to load and preprocess images
+def load_and_preprocess_images(folder_path):
+    images = []
+    labels = []
+
+    for label in range(2):
+      if label == 0:
+        wlabel = "Bleeding"
+      else:
+        wlabel = 'NonBleeding'                                                  # Assuming you have 2 classes (bleeding and non-bleeding)
+      for i in range(1, 1310):
+            image_path = f"{folder_path}/Images_{wlabel}/img- ({i}).png"
+            img = io.imread(image_path)
+            img_gray = color.rgb2gray(img)
+            img_resized = transform.resize(img_gray, (8, 8), anti_aliasing=True)
+
+            images.append(img_resized.flatten())
+            labels.append(label)
+
+    return np.array(images), np.array(labels)
+
+# Load and preprocess your image data
+X_custom, y_custom = load_and_preprocess_images(image_folder)
+
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X_custom, y_custom, test_size=0.2, random_state=42)
+
+# Convert labels to categorical (one-hot encoding)
+y_train_categorical = to_categorical(y_train, num_classes=2)
+y_test_categorical = to_categorical(y_test, num_classes=2)
+
+# Train KNN model
+knn_model = KNeighborsClassifier(n_neighbors=3)
+knn_model.fit(X_train, y_train)
+
+# Train SVM model
+svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')
+svm_model.fit(X_train, y_train)
+
+# Train CNN model
+cnn_model = Sequential()
+cnn_model.add(Flatten(input_shape=(8, 8)))
+cnn_model.add(Dense(128, activation='relu'))
+cnn_model.add(Dense(2, activation='softmax'))
+cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
+cnn_X_train = X_train.reshape(-1, 8, 8)
+cnn_X_test = X_test.reshape(-1, 8, 8)
+cnn_model.fit(cnn_X_train, y_train_categorical, epochs=10, batch_size=32)
+
+# Train Decision Tree model
+dt_model = DecisionTreeClassifier(random_state=42)
+dt_model.fit(X_train, y_train)
+
+# Make predictions using each model
+knn_predictions = knn_model.predict(X_test)
+svm_predictions = svm_model.predict(X_test)
+cnn_predictions = np.argmax(cnn_model.predict(cnn_X_test), axis=1)
+dt_predictions = dt_model.predict(X_test)
+
+# Combine predictions using simple averaging
+ensemble_predictions = np.round((knn_predictions + svm_predictions + cnn_predictions + dt_predictions) / 4)
+
+# Evaluate the ensemble performance
+# Accuracy
+ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
+print(f"Ensemble Accuracy: {ensemble_accuracy:.2f}")
+
+# Precision
+ensemble_precision = precision_score(y_test, ensemble_predictions)
+print(f"Ensemble Precision: {ensemble_precision:.2f}")
+
+# Recall
+ensemble_recall = recall_score(y_test, ensemble_predictions)
+print(f"Ensemble Recall: {ensemble_recall:.2f}")
+
+# F1 Score
+ensemble_f1_score = f1_score(y_test, ensemble_predictions)
+print(f"Ensemble F1 Score: {ensemble_f1_score:.2f}")
+
+# Confusion Matrix
+ensemble_ConfusionMatrix = confusion_matrix(y_test, ensemble_predictions)
+fig, ax = plt.subplots(figsize=(6,6))         # Sample figsize in inches
+sns.set(font_scale=1.6)
+sns.heatmap(ensemble_ConfusionMatrix, annot=True, ax=ax)
+print(f"Ensemble Confusion Matrix:\n{ensemble_ConfusionMatrix}")
+
+# PLOTTING GRAPHS
+
+# ROC Curve
+plt.figure(figsize=(8, 6))
+
+fpr, tpr, _ = roc_curve(y_test, ensemble_predictions)
+roc_auc = auc(fpr, tpr)
+plt.plot(fpr, tpr, label=f'Ensemble (AUC = {roc_auc:.2f})', linestyle='--')
+
+plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
+plt.xlabel('False Positive Rate')
+plt.ylabel('True Positive Rate')
+plt.title('Receiver Operating Characteristic (ROC) Curve')
+plt.legend(loc='lower right')
+plt.show()
+
+# Precision-Recall Curve
+plt.figure(figsize=(8, 6))
+
+precision, recall, _ = precision_recall_curve(y_test, ensemble_predictions)
+pr_auc = auc(recall, precision)
+plt.plot(recall, precision, label=f'Ensemble (AUC = {pr_auc:.2f})', linestyle='--')
+
+plt.xlabel('Recall')
+plt.ylabel('Precision')
+plt.title('Precision-Recall Curve')
+plt.legend(loc='lower left')
+plt.show()
\ No newline at end of file