--- a +++ b/Classifier/AllPytorch2020.py @@ -0,0 +1,141 @@ +import os +import random + +import numpy as np +import pandas as pd +import torch +import torch.optim as optim +import tqdm +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.model_selection import train_test_split +from torch import nn +from torch.optim.adam import Adam +from torch.utils.data import DataLoader +from torch.utils.data.sampler import SubsetRandomSampler + +from Classes.Data import LeukemiaDataset, augmentation +from Classes.Helpers import Helpers +from Classes.interpretability import interpret_model +from Classes.Model_2020 import LuekemiaNet, train_model +from Classes.model_api import (confusion_matrix2, get_predictions, + plot_training_history) + +SEED = 323 + + +def seed_everything(seed=SEED): + random.seed(seed) + os.environ['PYHTONHASHSEED'] = str(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.backends.cudnn.deterministic = True + + +image_path = '/home/allen/Drive C/Peter Moss AML Leukemia Research/ALL-PyTorch-2020/Classifier/Model/Data/Test/Im047_0.jpg' +image_path2 = '/home/allen/Drive C/Peter Moss AML Leukemia Research/ALL-PyTorch-2020/Classifier/Model/Data/Test/Im006_1.jpg' +label_idx = '/home/allen/Drive C/Peter Moss AML Leukemia Research/Dataset/pred_label.json' + +seed_everything(SEED) +# helper class +helper = Helpers("Test Model", False) +# train data directory +train_dir = '/home/allen/Drive C/Peter Moss AML Leukemia Research/Dataset/all_train/' +# train label directoy +train_csv = '/home/allen/Drive C/Peter Moss AML Leukemia Research/Dataset/train.csv' +# labels +class_name = ["zero", "one"] + +# training batch size +batch_size = helper.config["classifier"]["train"]["batch"] +# accuracy and loss save directory +acc_loss_png = helper.config["classifier"]["model_params"]["plot_loss_and_acc"] +# confusion matrix save directory +confusion_png = helper.config["classifier"]["model_params"]["confusion_matrix"] +# number of epoch +epochs = helper.config["classifier"]["train"]["epochs"] +# learning rate +learn_rate = helper.config["classifier"]["train"]["learning_rate_adam"] +# decay +decay = helper.config["classifier"]["train"]["decay_adam"] +# read train CSV file +labels = pd.read_csv(train_csv) +# print label count +labels_count = labels.label.value_counts() +print(labels_count) +# print 5 label header +print(labels.head()) +# splitting data into training and validation set +train, valid = train_test_split(labels, stratify=labels.label, test_size=0.2, shuffle=True) +print(len(train), len(valid)) + +# data augmentation +training_transforms, validation_transforms = augmentation() + +# Read Acute Lymphoblastic Leukemia dataset from disk +trainset = LeukemiaDataset(df_data=train, data_dir=train_dir, transform=training_transforms) +validset = LeukemiaDataset(df_data=valid, data_dir=train_dir, transform=validation_transforms) + +train_size, valid_size = len(trainset), len(validset) +print(train_size, valid_size) + +train_sampler = SubsetRandomSampler(list(train.index)) +valid_sampler = SubsetRandomSampler(list(valid.index)) + +# Prepare dataset for neural networks +train_data_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True) +valid_data_loader = DataLoader(validset, batch_size=batch_size, shuffle=False) + +# Checking the dataset +print('Training Set:\n') +for images, labels in train_data_loader: + print('Image batch dimensions:', images.size()) + print('Image label dimensions:', labels.size()) + break +print("\n") + +# +for images, labels in valid_data_loader: + print("The labels: ", labels) + +# Define model +model = LuekemiaNet() +# check if CUDA is available, else use CPU +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +# Move our model to available hardware +model.to(device) + +# Cross entropy loss function +criterion = nn.CrossEntropyLoss() +# specify optimizer (stochastic gradient descent) and learning rate = 0.001 +optimizer = Adam(params=model.parameters(), lr=learn_rate, weight_decay=decay) +# scheduler = CyclicLR(optimizer, base_lr=lr, max_lr=0.01, step_size=5, mode='triangular2') +scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, + gamma=helper.config["classifier"]["model_params"]["gamma"]) + +# begin training +real_model, history = train_model(model, train_data_loader, + valid_data_loader, + optimizer, scheduler, + criterion, train_size, + valid_size, + device=device, n_epochs=epochs) +# plot model loss and accuracy +plot_training_history(history, save_path=acc_loss_png) +# Get model prediction +y_pred, y_test = get_predictions(real_model, valid_data_loader, device) +# Get model precision, recall and f1_score +helper.logger.info(classification_report(y_test, y_pred, target_names=class_name)) +# Get model confusion matrix +cm = confusion_matrix(y_test, y_pred) +confusion_matrix2(cm, class_name, save_path=confusion_png) + +interpret_model(real_model, validation_transforms, image_path, label_idx, use_cpu=True, + interpret_type="integrated gradients") +interpret_model(real_model, validation_transforms, image_path, label_idx, use_cpu=True, interpret_type="gradient shap") + +interpret_model(real_model, validation_transforms, image_path2, label_idx, use_cpu=True, + interpret_type="integrated gradients") +interpret_model(real_model, validation_transforms, image_path2, label_idx, use_cpu=True, interpret_type="gradient shap") + +