In [1]:
from Classes.Data import augmentation, LeukemiaDataset, augmentation_alb
from Classes.Helpers import Helpers
from Classes.Model_2020 import LuekemiaNet, learner
from Classes.model_api import plot_training_history, show_predictions,\
    confusion_matrix2, get_predictions, predict_probability, show_prediction_confidence
from Classes.interpretability import interpret_model
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import torch.optim as optim
import pandas as pd
from torch import nn
import random
import torch
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

In [2]:
SEED = 323
def seed_everything(seed=SEED):
    random.seed(seed)
    os.environ['PYHTONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


seed_everything(SEED)
# helper class
helper = Helpers("Test Model", False)
# train data directory
train_dir = 'C:/Peter Moss AML Leukemia Research/Dataset/all_train/'
# train label directoy
train_csv = 'C:/Peter Moss AML Leukemia Research/Dataset/train.csv'
# labels
class_name = ["neg", "pos"]

In [3]:
image_path = 'C:\Peter Moss AML Leukemia Research\ALL-PyTorch-2020\Classifier\Model\Data\Test\Im047_0.jpg'
image_path2 = 'C:\Peter Moss AML Leukemia Research\ALL-PyTorch-2020\Classifier\Model\Data\Test\Im006_1.jpg'
label_idx = 'C:\Peter Moss AML Leukemia Research\ALL-PyTorch-2020\Classifier\Model\class_idx.json'

In [4]:
# training batch size
batch_size = helper.config["classifier"]["train"]["batch"]
# accuracy and loss save directory
acc_loss_png = helper.config["classifier"]["model_params"]["plot_loss_and_acc"]
# confusion matrix save directory
confusion_png = helper.config["classifier"]["model_params"]["confusion_matrix"]
# number of epoch
epochs = helper.config["classifier"]["train"]["epochs"]
# learning rate
learn_rate = helper.config["classifier"]["train"]["learning_rate_adam"]
# decay
decay = helper.config["classifier"]["train"]["decay_adam"]
# read train CSV file
labels = pd.read_csv(train_csv)
# print label count
labels_count = labels.label.value_counts()
print(labels_count)
# print 5 label header
print(labels.head())
# splitting data into training and validation set
train, valid = train_test_split(labels, stratify = labels.label, test_size = 0.1, shuffle=True)
print(len(train),len(valid))

0    49
1    39
Name: label, dtype: int64
      data  label
0  Im001_1      1
1  Im002_1      1
2  Im003_1      1
3  Im004_1      1
4  Im005_1      1
79 9


In [5]:
#data augmentation
training_transforms, validation_transforms = augmentation()

# Read Acute Lymphoblastic Leukemia dataset from disk
trainset = LeukemiaDataset(df_data=train, data_dir=train_dir, transform=training_transforms)
validset = LeukemiaDataset(df_data=valid, data_dir=train_dir, transform=validation_transforms)

train_size, valid_size = len(trainset), len(validset)
print(train_size, valid_size)

train_sampler = SubsetRandomSampler(list(train.index))
valid_sampler = SubsetRandomSampler(list(valid.index))

# Prepare dataset for neural networks
train_data_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
valid_data_loader = DataLoader(validset, batch_size=batch_size, shuffle=False)


79 9


In [6]:
# Checking the dataset
print('Training Set:\n')
for images, labels in train_data_loader:
    print('Image batch dimensions:', images.size())
    print('Image label dimensions:', labels.size())
    break
print("\n")

# loop through our validation data and output the labels
for images, labels in valid_data_loader:
    print("The labels: ",labels)

Training Set:

Image batch dimensions: torch.Size([10, 3, 100, 100])
Image label dimensions: torch.Size([10])


The labels:  tensor([1, 1, 0, 0, 0, 0, 0, 1, 1])


In [7]:

# Define model
model = LuekemiaNet()
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()
# Move our model to CUDA for gpu training
model.cuda()


if not train_on_gpu:
    print('CUDA is not available and training on CPU ...')
else:
    print('CUDA is available and training on GPU ...')


CUDA is available and training on GPU ...


In [8]:
# Cross entropy loss function
criterion = nn.CrossEntropyLoss()
# specify optimizer (stochastic gradient descent) and learning rate = 0.001
optimizer = optim.Adam(model.parameters(), weight_decay=decay, lr=learn_rate)
# scheduler = CyclicLR(optimizer, base_lr=lr, max_lr=0.01, step_size=5, mode='triangular2')
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=helper.config["classifier"]["model_params"]["gamma"])

In [9]:
# begin training
real_model, history = learner(model, train_data_loader,
                              valid_data_loader,
                              optimizer, scheduler,
                              criterion, train_size,
                              valid_size,
                              train_on_gpu=True, n_epochs=epochs)

2020-12-31 03:09:27,061 - Model - INFO - Epoch 1/10
2020-12-31 03:09:27,091 - Model - INFO - ----------
2020-12-31 03:09:38,622 - Model - INFO - Train loss: 0.5225205793976784 Accuracy: 0.7341772317886353
2020-12-31 03:09:39,573 - Model - INFO - Valid loss: 0.7080000042915344 Accuracy: 0.5555555820465088

2020-12-31 03:09:39,761 - Model - INFO - Epoch 2/10
2020-12-31 03:09:39,774 - Model - INFO - ----------
2020-12-31 03:09:47,664 - Model - INFO - Train loss: 0.3280406277626753 Accuracy: 0.8607594966888428
2020-12-31 03:09:48,575 - Model - INFO - Valid loss: 0.8643777370452881 Accuracy: 0.5555555820465088

2020-12-31 03:09:48,645 - Model - INFO - Epoch 3/10
2020-12-31 03:09:48,651 - Model - INFO - ----------


--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Allen\Anaconda3\lib\logging\handlers.py", line 69, in emit
    self.doRollover()
  File "C:\Users\Allen\Anaconda3\lib\logging\handlers.py", line 393, in doRollover
    self.rotate(self.baseFilename, dfn)
  File "C:\Users\Allen\Anaconda3\lib\logging\handlers.py", line 110, in rotate
    os.rename(source, dest)
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Peter Moss AML Leukemia Research\\ALL-PyTorch-2020\\Classifier\\Logs\\all.log' -> 'C:\\Peter Moss AML Leukemia Research\\ALL-PyTorch-2020\\Classifier\\Logs\\all.log.2020-12-30_00'
Call stack:
  File "C:\Users\Allen\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\Allen\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\Allen\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    ap

KeyboardInterrupt: 

In [None]:
# plot model loss and accuracy
plot_training_history(history, save_path=acc_loss_png)

In [None]:
# Get model prediction
y_pred, y_test = get_predictions(real_model,valid_data_loader)
# Get model precision, recall and f1_score
helper.logger.info(classification_report(y_test, y_pred, target_names=class_name))

In [None]:
# Get model confusion matrix
cm = confusion_matrix(y_test, y_pred)
confusion_matrix2(cm, class_name,save_path=confusion_png)

In [None]:
prediction = predict_probability(real_model,validation_transforms, 'C:\Peter Moss AML Leukemia Research\ALL-PyTorch-2020\Classifier\Model\Data\Test\Im047_0.jpg')

In [None]:
prediction

In [None]:
show_prediction_confidence(prediction, class_name)

Model Interpretation

In [None]:
#prediction = predict_probability(real_model, validation_transforms, image_path)
interpret_model(real_model,validation_transforms, image_path, label_idx, use_cpu=True, intrepret_type="integrated gradients")
interpret_model(real_model,validation_transforms, image_path, label_idx, use_cpu=True, intrepret_type="gradient shap")
interpret_model(real_model,validation_transforms, image_path, label_idx, use_cpu=True, intrepret_type="saliency")

interpret_model(real_model,validation_transforms, image_path2, label_idx, use_cpu=True, intrepret_type="integrated gradients")
interpret_model(real_model,validation_transforms, image_path2, label_idx, use_cpu=True, intrepret_type="gradient shap")
interpret_model(real_model,validation_transforms, image_path2, label_idx, use_cpu=True, intrepret_type="saliency")

In [None]:
#train data directory
test_dir = 'C:/Peter Moss AML Leukemia Research/Dataset/all_test/'
test_csv = 'C:/Peter Moss AML Leukemia Research/Dataset/test.csv'
label_csv = pd.read_csv(test_csv)
testset = LeukemiaDataset(df_data=label_csv, data_dir=test_dir, transform=validation_transforms)
test_data_loader = DataLoader(testset, batch_size=batch_size, shuffle=True)
show_predictions(model=real_model, class_names=class_name, test_data_loader=test_data_loader, n_images=6)