--- a
+++ b/main.py
@@ -0,0 +1,134 @@
+'''
+About: The main Python script to develop a classification program based on MLP neural networks,
+       a spectroscopic dataset as the predictor variables, and an 1D histology score dataset as the target variable.
+Author: Iman Kafian-Attari
+Date: 20.07.2021
+Licence: MIT
+version: 0.1
+=========================================================
+How to use:
+1. Select the output directory.
+1. Select the training X 2D numpy array.
+1. Select the test X 2D numpy array.
+1. Select the training Y 2D numpy array.
+1. Select the test Y 2D numpy array.
+=========================================================
+Notes:
+1. This code is meant to create a classification problem using the following:
+   - different MLP neural networks,
+   - a spectroscopic dataset as predictors,
+   - a 1D histoloy score as the target.
+2. It requires the following inputs from the user:
+   - an output directory,
+   - two numpy 2D matrices containing the information on the training and test datasets for the predictor in the form of mxn
+     where m: number of observation and n: number of predictor variables,
+   - two numpy 2D matrices containing the information on the training and test datasets for the target in the form of mx1
+     where m: number of observation and 1: the only target variable,
+3. It automatically creates the classification problem for four different MLP architectures.
+4. It stores and plots the performance of each model on the training and test datasets.
+=========================================================
+TODO for version O.2
+1. Modify the code in a functional form.
+2. Modify to code to work for any number of target variables.
+=========================================================
+'''
+
+print(__doc__)
+
+import numpy as np
+from matplotlib import pyplot as plt
+from sklearn.metrics import multilabel_confusion_matrix, confusion_matrix, classification_report
+import pandas as pd
+
+from tensorflow import keras
+from keras.models import Sequential
+from tensorflow.keras.callbacks import EarlyStopping
+from keras.layers import *
+from keras.optimizers import *
+from keras.losses import *
+from tensorflow.keras.losses import categorical_crossentropy
+
+from architectures.neural_network_models import neural_model1, neural_model2, neural_model3, neural_model4
+
+import tkinter as tk
+from tkinter import filedialog
+
+root = tk.Tk()
+root.withdraw()
+
+output_dir = filedialog.askdirectory(parent=root, initialdir='C:\\', title='Select the output directory')
+
+# Reading the predictors and references
+x_train = np.loadtxt(filedialog.askopenfilename(parent=root, initialdir='C:\\', title='Select the training input file, a 2D numpy array'))
+x_test = np.loadtxt(filedialog.askopenfilename(parent=root, initialdir='C:\\', title='Select the test input file, a 2D numpy array'))
+y_train = np.loadtxt(filedialog.askopenfilename(parent=root, initialdir='C:\\', title='Select the training output file, a 2D numpy array'))
+y_test = np.loadtxt(filedialog.askopenfilename(parent=root, initialdir='C:\\', title='Select the test output file, a 2D numpy array'))
+y_test1 = y_test
+
+# Reading the range of categorical histology score:
+labels = ','.split(input('Please insert the range of labels used for the target score, separated with a comma (,),'
+                         ' e.g. 0,1,2,3,4 --> '))
+
+# Dimension of the train set
+dim_x_input, dim_y_input = x_train.shape
+
+# Normalizing the targeted data to a categorical dataset
+y_train = keras.utils.to_categorical(y_train, len(labels))
+y_test = keras.utils.to_categorical(y_test, len(labels))
+
+# Creating a Pandas dataframe to store the performance of the NN models
+performance = {'loss': [],
+               'accuracy': []}
+architecture_report = {'NN1': {}, 'NN2': {}, 'NN3': {}, 'NN4': {}}
+
+# Compiling and fitting the model based on the 1st neural network architecture
+models = [neural_model1(dim_y_input), neural_model2(dim_y_input), neural_model3(dim_y_input), neural_model4(dim_y_input)]
+for architecture in range(len(models)):
+    model = models[architecture]
+    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
+    history = model.fit(x_train, y_train, batch_size=60, epochs=500, verbose=1)
+    score = model.evaluate(x_test, y_test, verbose=0)
+
+    # Visualizing the performance of the model
+    print(f'\n Architecture {architecture+1} Performance')
+    print(f'Total loss: {score[0]}')
+    performance['loss'].append(score[0])
+    print(f'Total accuracy: {score[1]*100}')
+    performance['accuracy'].append(score[1]*100)
+
+    fig = plt.figure(figsize=(12, 6))
+    plt.subplot(1, 2, 1)
+    plt.plot(history.history['accuracy'])
+    plt.plot(history.history['val_accuracy'])
+    plt.legend(['train accuracy', 'test accuracy'], loc='best')
+    plt.title('model accuracy')
+    plt.xlabel('epoch')
+    plt.ylabel('accuracy')
+    plt.subplot(1, 2, 2)
+    plt.plot(history.history['loss'])
+    plt.plot(history.history['val_loss'])
+    plt.legend(['train loss', 'test loss'], loc='best')
+    plt.title('model loss')
+    plt.xlabel('epoch')
+    plt.ylabel('loss')
+    plt.suptitle(f'Performance of Architecture {architecture+1}')
+    plt.savefig(f'{output_dir}\\PerformanceArchitecture{architecture+1}.png', dpi=300)
+    plt.show(block=False)
+    plt.pause(10)
+    plt.close()
+
+    # Prediction
+    y_pred = np.argmax(model.predict(x_test), axis=-1)
+    pred_acc = confusion_matrix(y_test1.ravel(), y_pred, labels=list(labels))
+    print(pred_acc)
+    report = classification_report(y_test1.ravel(), y_pred, labels=list(labels), output_dict=True, zero_division=0)
+    architecture_report[f'NN{architecture+1}'] = report
+    print(report)
+
+performance = pd.DataFrame(performance, index=['NN1', 'NN2', 'NN3', 'NN4'])
+performance.to_csv(f'{output_dir}\\ArchitecturePerformance.csv', sep='\t')
+print(performance)
+
+architecture_report = pd.DataFrame.from_dict(architecture_report)
+architecture_report.to_csv(f'{output_dir}\\ArchitectureClassificationReport.csv', sep='\t')
+print(architecture_report)