Diff of /main.py [000000] .. [a23339]

Switch to unified view

a b/main.py
1
'''
2
About: The main Python script to develop a classification program based on MLP neural networks,
3
       a spectroscopic dataset as the predictor variables, and an 1D histology score dataset as the target variable.
4
Author: Iman Kafian-Attari
5
Date: 20.07.2021
6
Licence: MIT
7
version: 0.1
8
=========================================================
9
How to use:
10
1. Select the output directory.
11
1. Select the training X 2D numpy array.
12
1. Select the test X 2D numpy array.
13
1. Select the training Y 2D numpy array.
14
1. Select the test Y 2D numpy array.
15
=========================================================
16
Notes:
17
1. This code is meant to create a classification problem using the following:
18
   - different MLP neural networks,
19
   - a spectroscopic dataset as predictors,
20
   - a 1D histoloy score as the target.
21
2. It requires the following inputs from the user:
22
   - an output directory,
23
   - two numpy 2D matrices containing the information on the training and test datasets for the predictor in the form of mxn
24
     where m: number of observation and n: number of predictor variables,
25
   - two numpy 2D matrices containing the information on the training and test datasets for the target in the form of mx1
26
     where m: number of observation and 1: the only target variable,
27
3. It automatically creates the classification problem for four different MLP architectures.
28
4. It stores and plots the performance of each model on the training and test datasets.
29
=========================================================
30
TODO for version O.2
31
1. Modify the code in a functional form.
32
2. Modify to code to work for any number of target variables.
33
=========================================================
34
'''
35
36
print(__doc__)
37
38
import numpy as np
39
from matplotlib import pyplot as plt
40
from sklearn.metrics import multilabel_confusion_matrix, confusion_matrix, classification_report
41
import pandas as pd
42
43
from tensorflow import keras
44
from keras.models import Sequential
45
from tensorflow.keras.callbacks import EarlyStopping
46
from keras.layers import *
47
from keras.optimizers import *
48
from keras.losses import *
49
from tensorflow.keras.losses import categorical_crossentropy
50
51
from architectures.neural_network_models import neural_model1, neural_model2, neural_model3, neural_model4
52
53
import tkinter as tk
54
from tkinter import filedialog
55
56
root = tk.Tk()
57
root.withdraw()
58
59
output_dir = filedialog.askdirectory(parent=root, initialdir='C:\\', title='Select the output directory')
60
61
# Reading the predictors and references
62
x_train = np.loadtxt(filedialog.askopenfilename(parent=root, initialdir='C:\\', title='Select the training input file, a 2D numpy array'))
63
x_test = np.loadtxt(filedialog.askopenfilename(parent=root, initialdir='C:\\', title='Select the test input file, a 2D numpy array'))
64
y_train = np.loadtxt(filedialog.askopenfilename(parent=root, initialdir='C:\\', title='Select the training output file, a 2D numpy array'))
65
y_test = np.loadtxt(filedialog.askopenfilename(parent=root, initialdir='C:\\', title='Select the test output file, a 2D numpy array'))
66
y_test1 = y_test
67
68
# Reading the range of categorical histology score:
69
labels = ','.split(input('Please insert the range of labels used for the target score, separated with a comma (,),'
70
                         ' e.g. 0,1,2,3,4 --> '))
71
72
# Dimension of the train set
73
dim_x_input, dim_y_input = x_train.shape
74
75
# Normalizing the targeted data to a categorical dataset
76
y_train = keras.utils.to_categorical(y_train, len(labels))
77
y_test = keras.utils.to_categorical(y_test, len(labels))
78
79
# Creating a Pandas dataframe to store the performance of the NN models
80
performance = {'loss': [],
81
               'accuracy': []}
82
architecture_report = {'NN1': {}, 'NN2': {}, 'NN3': {}, 'NN4': {}}
83
84
# Compiling and fitting the model based on the 1st neural network architecture
85
models = [neural_model1(dim_y_input), neural_model2(dim_y_input), neural_model3(dim_y_input), neural_model4(dim_y_input)]
86
for architecture in range(len(models)):
87
    model = models[architecture]
88
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
89
    history = model.fit(x_train, y_train, batch_size=60, epochs=500, verbose=1)
90
    score = model.evaluate(x_test, y_test, verbose=0)
91
92
    # Visualizing the performance of the model
93
    print(f'\n Architecture {architecture+1} Performance')
94
    print(f'Total loss: {score[0]}')
95
    performance['loss'].append(score[0])
96
    print(f'Total accuracy: {score[1]*100}')
97
    performance['accuracy'].append(score[1]*100)
98
99
    fig = plt.figure(figsize=(12, 6))
100
    plt.subplot(1, 2, 1)
101
    plt.plot(history.history['accuracy'])
102
    plt.plot(history.history['val_accuracy'])
103
    plt.legend(['train accuracy', 'test accuracy'], loc='best')
104
    plt.title('model accuracy')
105
    plt.xlabel('epoch')
106
    plt.ylabel('accuracy')
107
    plt.subplot(1, 2, 2)
108
    plt.plot(history.history['loss'])
109
    plt.plot(history.history['val_loss'])
110
    plt.legend(['train loss', 'test loss'], loc='best')
111
    plt.title('model loss')
112
    plt.xlabel('epoch')
113
    plt.ylabel('loss')
114
    plt.suptitle(f'Performance of Architecture {architecture+1}')
115
    plt.savefig(f'{output_dir}\\PerformanceArchitecture{architecture+1}.png', dpi=300)
116
    plt.show(block=False)
117
    plt.pause(10)
118
    plt.close()
119
120
    # Prediction
121
    y_pred = np.argmax(model.predict(x_test), axis=-1)
122
    pred_acc = confusion_matrix(y_test1.ravel(), y_pred, labels=list(labels))
123
    print(pred_acc)
124
    report = classification_report(y_test1.ravel(), y_pred, labels=list(labels), output_dict=True, zero_division=0)
125
    architecture_report[f'NN{architecture+1}'] = report
126
    print(report)
127
128
performance = pd.DataFrame(performance, index=['NN1', 'NN2', 'NN3', 'NN4'])
129
performance.to_csv(f'{output_dir}\\ArchitecturePerformance.csv', sep='\t')
130
print(performance)
131
132
architecture_report = pd.DataFrame.from_dict(architecture_report)
133
architecture_report.to_csv(f'{output_dir}\\ArchitectureClassificationReport.csv', sep='\t')
134
print(architecture_report)