Switch to unified view

a b/Classifier/AllPytorch2020.py
1
import os
2
import random
3
4
import numpy as np
5
import pandas as pd
6
import torch
7
import torch.optim as optim
8
import tqdm
9
from sklearn.metrics import classification_report, confusion_matrix
10
from sklearn.model_selection import train_test_split
11
from torch import nn
12
from torch.optim.adam import Adam
13
from torch.utils.data import DataLoader
14
from torch.utils.data.sampler import SubsetRandomSampler
15
16
from Classes.Data import LeukemiaDataset, augmentation
17
from Classes.Helpers import Helpers
18
from Classes.interpretability import interpret_model
19
from Classes.Model_2020 import LuekemiaNet, train_model
20
from Classes.model_api import (confusion_matrix2, get_predictions,
21
                               plot_training_history)
22
23
SEED = 323
24
25
26
def seed_everything(seed=SEED):
27
    random.seed(seed)
28
    os.environ['PYHTONHASHSEED'] = str(seed)
29
    np.random.seed(seed)
30
    torch.manual_seed(seed)
31
    torch.cuda.manual_seed(seed)
32
    torch.backends.cudnn.deterministic = True
33
34
35
image_path = '/home/allen/Drive C/Peter Moss AML Leukemia Research/ALL-PyTorch-2020/Classifier/Model/Data/Test/Im047_0.jpg'
36
image_path2 = '/home/allen/Drive C/Peter Moss AML Leukemia Research/ALL-PyTorch-2020/Classifier/Model/Data/Test/Im006_1.jpg'
37
label_idx = '/home/allen/Drive C/Peter Moss AML Leukemia Research/Dataset/pred_label.json'
38
39
seed_everything(SEED)
40
# helper class
41
helper = Helpers("Test Model", False)
42
# train data directory
43
train_dir = '/home/allen/Drive C/Peter Moss AML Leukemia Research/Dataset/all_train/'
44
# train label directoy
45
train_csv = '/home/allen/Drive C/Peter Moss AML Leukemia Research/Dataset/train.csv'
46
# labels
47
class_name = ["zero", "one"]
48
49
# training batch size
50
batch_size = helper.config["classifier"]["train"]["batch"]
51
# accuracy and loss save directory
52
acc_loss_png = helper.config["classifier"]["model_params"]["plot_loss_and_acc"]
53
# confusion matrix save directory
54
confusion_png = helper.config["classifier"]["model_params"]["confusion_matrix"]
55
# number of epoch
56
epochs = helper.config["classifier"]["train"]["epochs"]
57
# learning rate
58
learn_rate = helper.config["classifier"]["train"]["learning_rate_adam"]
59
# decay
60
decay = helper.config["classifier"]["train"]["decay_adam"]
61
# read train CSV file
62
labels = pd.read_csv(train_csv)
63
# print label count
64
labels_count = labels.label.value_counts()
65
print(labels_count)
66
# print 5 label header
67
print(labels.head())
68
# splitting data into training and validation set
69
train, valid = train_test_split(labels, stratify=labels.label, test_size=0.2, shuffle=True)
70
print(len(train), len(valid))
71
72
# data augmentation
73
training_transforms, validation_transforms = augmentation()
74
75
# Read Acute Lymphoblastic Leukemia dataset from disk
76
trainset = LeukemiaDataset(df_data=train, data_dir=train_dir, transform=training_transforms)
77
validset = LeukemiaDataset(df_data=valid, data_dir=train_dir, transform=validation_transforms)
78
79
train_size, valid_size = len(trainset), len(validset)
80
print(train_size, valid_size)
81
82
train_sampler = SubsetRandomSampler(list(train.index))
83
valid_sampler = SubsetRandomSampler(list(valid.index))
84
85
# Prepare dataset for neural networks
86
train_data_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
87
valid_data_loader = DataLoader(validset, batch_size=batch_size, shuffle=False)
88
89
# Checking the dataset
90
print('Training Set:\n')
91
for images, labels in train_data_loader:
92
    print('Image batch dimensions:', images.size())
93
    print('Image label dimensions:', labels.size())
94
    break
95
print("\n")
96
97
#
98
for images, labels in valid_data_loader:
99
    print("The labels: ", labels)
100
101
# Define model
102
model = LuekemiaNet()
103
# check if CUDA is available, else use CPU
104
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
105
# Move our model to available hardware
106
model.to(device)
107
108
# Cross entropy loss function
109
criterion = nn.CrossEntropyLoss()
110
# specify optimizer (stochastic gradient descent) and learning rate = 0.001
111
optimizer = Adam(params=model.parameters(), lr=learn_rate, weight_decay=decay)
112
# scheduler = CyclicLR(optimizer, base_lr=lr, max_lr=0.01, step_size=5, mode='triangular2')
113
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7,
114
                                      gamma=helper.config["classifier"]["model_params"]["gamma"])
115
116
# begin training
117
real_model, history = train_model(model, train_data_loader,
118
                                  valid_data_loader,
119
                                  optimizer, scheduler,
120
                                  criterion, train_size,
121
                                  valid_size,
122
                                  device=device, n_epochs=epochs)
123
# plot model loss and accuracy
124
plot_training_history(history, save_path=acc_loss_png)
125
# Get model prediction
126
y_pred, y_test = get_predictions(real_model, valid_data_loader, device)
127
# Get model precision, recall and f1_score
128
helper.logger.info(classification_report(y_test, y_pred, target_names=class_name))
129
# Get model confusion matrix
130
cm = confusion_matrix(y_test, y_pred)
131
confusion_matrix2(cm, class_name, save_path=confusion_png)
132
133
interpret_model(real_model, validation_transforms, image_path, label_idx, use_cpu=True,
134
                interpret_type="integrated gradients")
135
interpret_model(real_model, validation_transforms, image_path, label_idx, use_cpu=True, interpret_type="gradient shap")
136
137
interpret_model(real_model, validation_transforms, image_path2, label_idx, use_cpu=True,
138
                interpret_type="integrated gradients")
139
interpret_model(real_model, validation_transforms, image_path2, label_idx, use_cpu=True, interpret_type="gradient shap")
140
141