|
a |
|
b/Classifier/AllPytorch2020.py |
|
|
1 |
import os |
|
|
2 |
import random |
|
|
3 |
|
|
|
4 |
import numpy as np |
|
|
5 |
import pandas as pd |
|
|
6 |
import torch |
|
|
7 |
import torch.optim as optim |
|
|
8 |
import tqdm |
|
|
9 |
from sklearn.metrics import classification_report, confusion_matrix |
|
|
10 |
from sklearn.model_selection import train_test_split |
|
|
11 |
from torch import nn |
|
|
12 |
from torch.optim.adam import Adam |
|
|
13 |
from torch.utils.data import DataLoader |
|
|
14 |
from torch.utils.data.sampler import SubsetRandomSampler |
|
|
15 |
|
|
|
16 |
from Classes.Data import LeukemiaDataset, augmentation |
|
|
17 |
from Classes.Helpers import Helpers |
|
|
18 |
from Classes.interpretability import interpret_model |
|
|
19 |
from Classes.Model_2020 import LuekemiaNet, train_model |
|
|
20 |
from Classes.model_api import (confusion_matrix2, get_predictions, |
|
|
21 |
plot_training_history) |
|
|
22 |
|
|
|
23 |
SEED = 323 |
|
|
24 |
|
|
|
25 |
|
|
|
26 |
def seed_everything(seed=SEED): |
|
|
27 |
random.seed(seed) |
|
|
28 |
os.environ['PYHTONHASHSEED'] = str(seed) |
|
|
29 |
np.random.seed(seed) |
|
|
30 |
torch.manual_seed(seed) |
|
|
31 |
torch.cuda.manual_seed(seed) |
|
|
32 |
torch.backends.cudnn.deterministic = True |
|
|
33 |
|
|
|
34 |
|
|
|
35 |
image_path = '/home/allen/Drive C/Peter Moss AML Leukemia Research/ALL-PyTorch-2020/Classifier/Model/Data/Test/Im047_0.jpg' |
|
|
36 |
image_path2 = '/home/allen/Drive C/Peter Moss AML Leukemia Research/ALL-PyTorch-2020/Classifier/Model/Data/Test/Im006_1.jpg' |
|
|
37 |
label_idx = '/home/allen/Drive C/Peter Moss AML Leukemia Research/Dataset/pred_label.json' |
|
|
38 |
|
|
|
39 |
seed_everything(SEED) |
|
|
40 |
# helper class |
|
|
41 |
helper = Helpers("Test Model", False) |
|
|
42 |
# train data directory |
|
|
43 |
train_dir = '/home/allen/Drive C/Peter Moss AML Leukemia Research/Dataset/all_train/' |
|
|
44 |
# train label directoy |
|
|
45 |
train_csv = '/home/allen/Drive C/Peter Moss AML Leukemia Research/Dataset/train.csv' |
|
|
46 |
# labels |
|
|
47 |
class_name = ["zero", "one"] |
|
|
48 |
|
|
|
49 |
# training batch size |
|
|
50 |
batch_size = helper.config["classifier"]["train"]["batch"] |
|
|
51 |
# accuracy and loss save directory |
|
|
52 |
acc_loss_png = helper.config["classifier"]["model_params"]["plot_loss_and_acc"] |
|
|
53 |
# confusion matrix save directory |
|
|
54 |
confusion_png = helper.config["classifier"]["model_params"]["confusion_matrix"] |
|
|
55 |
# number of epoch |
|
|
56 |
epochs = helper.config["classifier"]["train"]["epochs"] |
|
|
57 |
# learning rate |
|
|
58 |
learn_rate = helper.config["classifier"]["train"]["learning_rate_adam"] |
|
|
59 |
# decay |
|
|
60 |
decay = helper.config["classifier"]["train"]["decay_adam"] |
|
|
61 |
# read train CSV file |
|
|
62 |
labels = pd.read_csv(train_csv) |
|
|
63 |
# print label count |
|
|
64 |
labels_count = labels.label.value_counts() |
|
|
65 |
print(labels_count) |
|
|
66 |
# print 5 label header |
|
|
67 |
print(labels.head()) |
|
|
68 |
# splitting data into training and validation set |
|
|
69 |
train, valid = train_test_split(labels, stratify=labels.label, test_size=0.2, shuffle=True) |
|
|
70 |
print(len(train), len(valid)) |
|
|
71 |
|
|
|
72 |
# data augmentation |
|
|
73 |
training_transforms, validation_transforms = augmentation() |
|
|
74 |
|
|
|
75 |
# Read Acute Lymphoblastic Leukemia dataset from disk |
|
|
76 |
trainset = LeukemiaDataset(df_data=train, data_dir=train_dir, transform=training_transforms) |
|
|
77 |
validset = LeukemiaDataset(df_data=valid, data_dir=train_dir, transform=validation_transforms) |
|
|
78 |
|
|
|
79 |
train_size, valid_size = len(trainset), len(validset) |
|
|
80 |
print(train_size, valid_size) |
|
|
81 |
|
|
|
82 |
train_sampler = SubsetRandomSampler(list(train.index)) |
|
|
83 |
valid_sampler = SubsetRandomSampler(list(valid.index)) |
|
|
84 |
|
|
|
85 |
# Prepare dataset for neural networks |
|
|
86 |
train_data_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True) |
|
|
87 |
valid_data_loader = DataLoader(validset, batch_size=batch_size, shuffle=False) |
|
|
88 |
|
|
|
89 |
# Checking the dataset |
|
|
90 |
print('Training Set:\n') |
|
|
91 |
for images, labels in train_data_loader: |
|
|
92 |
print('Image batch dimensions:', images.size()) |
|
|
93 |
print('Image label dimensions:', labels.size()) |
|
|
94 |
break |
|
|
95 |
print("\n") |
|
|
96 |
|
|
|
97 |
# |
|
|
98 |
for images, labels in valid_data_loader: |
|
|
99 |
print("The labels: ", labels) |
|
|
100 |
|
|
|
101 |
# Define model |
|
|
102 |
model = LuekemiaNet() |
|
|
103 |
# check if CUDA is available, else use CPU |
|
|
104 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |
|
|
105 |
# Move our model to available hardware |
|
|
106 |
model.to(device) |
|
|
107 |
|
|
|
108 |
# Cross entropy loss function |
|
|
109 |
criterion = nn.CrossEntropyLoss() |
|
|
110 |
# specify optimizer (stochastic gradient descent) and learning rate = 0.001 |
|
|
111 |
optimizer = Adam(params=model.parameters(), lr=learn_rate, weight_decay=decay) |
|
|
112 |
# scheduler = CyclicLR(optimizer, base_lr=lr, max_lr=0.01, step_size=5, mode='triangular2') |
|
|
113 |
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, |
|
|
114 |
gamma=helper.config["classifier"]["model_params"]["gamma"]) |
|
|
115 |
|
|
|
116 |
# begin training |
|
|
117 |
real_model, history = train_model(model, train_data_loader, |
|
|
118 |
valid_data_loader, |
|
|
119 |
optimizer, scheduler, |
|
|
120 |
criterion, train_size, |
|
|
121 |
valid_size, |
|
|
122 |
device=device, n_epochs=epochs) |
|
|
123 |
# plot model loss and accuracy |
|
|
124 |
plot_training_history(history, save_path=acc_loss_png) |
|
|
125 |
# Get model prediction |
|
|
126 |
y_pred, y_test = get_predictions(real_model, valid_data_loader, device) |
|
|
127 |
# Get model precision, recall and f1_score |
|
|
128 |
helper.logger.info(classification_report(y_test, y_pred, target_names=class_name)) |
|
|
129 |
# Get model confusion matrix |
|
|
130 |
cm = confusion_matrix(y_test, y_pred) |
|
|
131 |
confusion_matrix2(cm, class_name, save_path=confusion_png) |
|
|
132 |
|
|
|
133 |
interpret_model(real_model, validation_transforms, image_path, label_idx, use_cpu=True, |
|
|
134 |
interpret_type="integrated gradients") |
|
|
135 |
interpret_model(real_model, validation_transforms, image_path, label_idx, use_cpu=True, interpret_type="gradient shap") |
|
|
136 |
|
|
|
137 |
interpret_model(real_model, validation_transforms, image_path2, label_idx, use_cpu=True, |
|
|
138 |
interpret_type="integrated gradients") |
|
|
139 |
interpret_model(real_model, validation_transforms, image_path2, label_idx, use_cpu=True, interpret_type="gradient shap") |
|
|
140 |
|
|
|
141 |
|