SleepEEGNet / Git / [889a15] /seq2seq_sleep

Models:
ReneeD/
SleepEEGNet
Downloads: 1
[889a15]: / seq2seq_sleep_sleep-EDF.py
History
Download this file
761 lines (605 with data), 34.2 kB

import numpy as np
import matplotlib.pyplot as plt
import scipy.io as spio
from  sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, f1_score
import random
import time
import os
from datetime import datetime
from sklearn.metrics import confusion_matrix
from sklearn.metrics import cohen_kappa_score
import tensorflow as tf
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import ADASYN
from sklearn.model_selection import train_test_split
from tensorflow.python.layers.core import Dense
from tensorflow.contrib.seq2seq.python.ops import beam_search_decoder
from dataloader import SeqDataLoader
import argparse

def batch_data(x, y, batch_size):
    shuffle = np.random.permutation(len(x))
    start = 0
#     from IPython.core.debugger import Tracer; Tracer()()
    x = x[shuffle]
    y = y[shuffle]
    while start + batch_size <= len(x):
        yield x[start:start+batch_size], y[start:start+batch_size]
        start += batch_size
def flatten(name, input_var):
    dim = 1
    for d in input_var.get_shape()[1:].as_list():
        dim *= d
    output_var = tf.reshape(input_var,
                            shape=[-1, dim],
                            name=name)

    return output_var
def build_firstPart_model(input_var,keep_prob_=0.5):
        # List to store the output of each CNNs
        output_conns = []

        ######### CNNs with small filter size at the first layer #########

        # Convolution
        network = tf.layers.conv1d(inputs=input_var, filters=64, kernel_size=50, strides=6,
                                 padding='same', activation=tf.nn.relu)

        network = tf.layers.max_pooling1d(inputs=network, pool_size=8, strides=8, padding='same')

        # Dropout
        network = tf.nn.dropout(network, keep_prob_)


        # Convolution
        network = tf.layers.conv1d(inputs=network, filters=128, kernel_size=8, strides=1,
                                 padding='same', activation=tf.nn.relu)

        network = tf.layers.conv1d(inputs=network, filters=128, kernel_size=8, strides=1,
                                 padding='same', activation=tf.nn.relu)
        network = tf.layers.conv1d(inputs=network, filters=128, kernel_size=8, strides=1,
                                 padding='same', activation=tf.nn.relu)


        # Max pooling
        network = tf.layers.max_pooling1d(inputs=network, pool_size=4, strides=4, padding='same')


        # Flatten
        network = flatten(name="flat1", input_var=network)


        output_conns.append(network)

        ######### CNNs with large filter size at the first layer #########



        # Convolution
        network = tf.layers.conv1d(inputs=input_var, filters=64, kernel_size=400, strides=50,
                                   padding='same', activation=tf.nn.relu)

        network = tf.layers.max_pooling1d(inputs=network, pool_size=4, strides=4, padding='same')

        # Dropout
        network = tf.nn.dropout(network, keep_prob_)

        # Convolution
        network = tf.layers.conv1d(inputs=network, filters=128, kernel_size=6, strides=1,
                                   padding='same', activation=tf.nn.relu)

        network = tf.layers.conv1d(inputs=network, filters=128, kernel_size=6, strides=1,
                                   padding='same', activation=tf.nn.relu)
        network = tf.layers.conv1d(inputs=network, filters=128, kernel_size=6, strides=1,
                                   padding='same', activation=tf.nn.relu)

        # Max pooling
        network = tf.layers.max_pooling1d(inputs=network, pool_size=2, strides=2, padding='same')

        # Flatten
        network = flatten(name="flat2", input_var=network)


        output_conns.append(network)

        # Concat
        network = tf.concat(output_conns,1, name="concat1")

        # Dropout
        network = tf.nn.dropout(network, keep_prob_)

        return network
def plot_attention(attention_map, input_tags = None, output_tags = None):
    attn_len = len(attention_map)

    # Plot the attention_map
    plt.clf()
    f = plt.figure(figsize=(15, 10))
    ax = f.add_subplot(1, 1, 1)

    # Add image
    i = ax.imshow(attention_map, interpolation='nearest', cmap='gray')

    # Add colorbar
    cbaxes = f.add_axes([0.2, 0, 0.6, 0.03])
    cbar = f.colorbar(i, cax=cbaxes, orientation='horizontal')
    cbar.ax.set_xlabel('Alpha value (Probability output of the "softmax")', labelpad=2)

    # Add labels
    ax.set_yticks(range(attn_len))
    if output_tags != None:
      ax.set_yticklabels(output_tags[:attn_len])

    ax.set_xticks(range(attn_len))
    if input_tags != None:
      ax.set_xticklabels(input_tags[:attn_len], rotation=45)

    ax.set_xlabel('Input Sequence')
    ax.set_ylabel('Output Sequence')

    # add grid and legend
    ax.grid()
    HERE = os.path.realpath(os.path.join(os.path.realpath(__file__), '..'))
    dir_save = os.path.join(HERE, 'attention_maps')
    if (os.path.exists(dir_save) == False):
        os.mkdir(dir_save)
    f.savefig(os.path.join(dir_save, 'a_map_1.pdf'), bbox_inches='tight')
    # f.show()
    plt.show()
def build_network(hparams,char2numY,inputs,dec_inputs,keep_prob_=0.5,):

    if hparams.akara2017 is True:
        _inputs = tf.reshape(inputs, [-1, hparams.input_depth,1])
        network = build_firstPart_model(_inputs, keep_prob_)
        shape = network.get_shape().as_list()
        data_input_embed = tf.reshape(network, (-1, hparams.max_time_step, shape[1]))
    else:
        _inputs = tf.reshape(inputs, [-1, hparams.n_channels, hparams.input_depth / hparams.n_channels])

        conv1 = tf.layers.conv1d(inputs=_inputs, filters=32, kernel_size=2, strides=1,
                                 padding='same', activation=tf.nn.relu)
        max_pool_1 = tf.layers.max_pooling1d(inputs=conv1, pool_size=2, strides=2, padding='same')

        conv2 = tf.layers.conv1d(inputs=max_pool_1, filters=64, kernel_size=2, strides=1,
                                 padding='same', activation=tf.nn.relu)
        max_pool_2 = tf.layers.max_pooling1d(inputs=conv2, pool_size=2, strides=2, padding='same')

        conv3 = tf.layers.conv1d(inputs=max_pool_2, filters=128, kernel_size=2, strides=1,
                                 padding='same', activation=tf.nn.relu)
        max_pool_3 = tf.layers.max_pooling1d(inputs=conv3, pool_size=2, strides=2, padding='same')

        shape = max_pool_3.get_shape().as_list()
        data_input_embed = tf.reshape(max_pool_3, (-1, hparams.max_time_step, shape[1] * shape[2]))

    # timesteps = max_time
    # lstm_in = tf.unstack(data_input_embed, timesteps, 1)
    # lstm_size = 128
    # # Get lstm cell output
    # # Add LSTM layers
    # lstm_cell = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    # data_input_embed, states = tf.contrib.rnn.static_rnn(lstm_cell, lstm_in, dtype=tf.float32)
    # data_input_embed = tf.stack(data_input_embed, 1)
    # shape = data_input_embed.get_shape().as_list()
    # embed_size = 10 #128 lstm_size # shape[1]*shape[2]

    # Embedding layers
    with tf.variable_scope("embeddin") as embedding_scope:
        decoder_embedding = tf.Variable(tf.random_uniform((len(char2numY), hparams.embed_size), -1.0, 1.0), name='dec_embedding') # +1 to consider <EOD>
        decoder_emb_inputs = tf.nn.embedding_lookup(decoder_embedding, dec_inputs)


    with tf.variable_scope("encoding") as encoding_scope:
        if not hparams.bidirectional:

            # Regular approach with LSTM units
            # encoder_cell = tf.contrib.rnn.LSTMCell(hparams.num_units)
            # encoder_cell = tf.nn.rnn_cell.MultiRNNCell([encoder_cell] * hparams.lstm_layers)
            def lstm_cell():
                lstm = tf.contrib.rnn.LSTMCell(hparams.num_units)
                return lstm
            encoder_cell = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(hparams.lstm_layers)])
            encoder_outputs, encoder_state = tf.nn.dynamic_rnn(encoder_cell, inputs=data_input_embed, dtype=tf.float32)

        else:

            # Using a bidirectional LSTM architecture instead
            # enc_fw_cell = tf.contrib.rnn.LSTMCell(hparams.num_units)
            # enc_bw_cell = tf.contrib.rnn.LSTMCell(hparams.num_units)

            def lstm_cell():
                lstm = tf.contrib.rnn.LSTMCell(hparams.num_units)
                return lstm

            stacked_cell_fw = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(hparams.lstm_layers)],state_is_tuple=True)
            stacked_cell_bw = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(hparams.lstm_layers)],state_is_tuple=True)


            ((enc_fw_out, enc_bw_out), (enc_fw_final, enc_bw_final)) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=stacked_cell_fw,
                cell_bw=stacked_cell_bw,
                inputs=data_input_embed,
                dtype=tf.float32)
            encoder_final_state = []
            for layer in range(hparams.lstm_layers):
                enc_fin_c = tf.concat((enc_fw_final[layer].c, enc_bw_final[layer].c), 1)
                enc_fin_h = tf.concat((enc_fw_final[layer].h, enc_bw_final[layer].h), 1)
                encoder_final_state.append(tf.contrib.rnn.LSTMStateTuple(c=enc_fin_c, h=enc_fin_h))

            encoder_state = tuple(encoder_final_state)
            encoder_outputs = tf.concat((enc_fw_out, enc_bw_out), 2)


    with tf.variable_scope("decoding") as decoding_scope:

        output_layer = Dense(
            len(char2numY), use_bias=False)
        decoder_lengths = np.ones((hparams.batch_size), dtype=np.int32) * (hparams.max_time_step+1)
        training_helper = tf.contrib.seq2seq.TrainingHelper(decoder_emb_inputs, decoder_lengths)

        if not hparams.bidirectional:
            # decoder_cell = tf.contrib.rnn.LSTMCell(hparams.num_units)
            def lstm_cell():
                lstm = tf.contrib.rnn.LSTMCell(hparams.num_units)
                return lstm
            decoder_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(hparams.lstm_layers)])

        else:
            # decoder_cell = tf.contrib.rnn.LSTMCell(2 * hparams.num_units)
            def lstm_cell():
                lstm = tf.contrib.rnn.LSTMCell(2 * hparams.num_units)
                return lstm
            decoder_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(hparams.lstm_layers)])

        if hparams.use_attention:
            # Create an attention mechanism
            attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                hparams.num_units * 2 if hparams.bidirectional else hparams.num_units , encoder_outputs,
                memory_sequence_length=None)

            decoder_cells = tf.contrib.seq2seq.AttentionWrapper(
                decoder_cells, attention_mechanism,
                attention_layer_size=hparams.attention_size,alignment_history=True)

            encoder_state = decoder_cells.zero_state(hparams.batch_size, tf.float32).clone(cell_state=encoder_state)



        # Basic Decoder and decode
        decoder = tf.contrib.seq2seq.BasicDecoder(
            decoder_cells, training_helper, encoder_state,
            output_layer=output_layer)

        dec_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder,impute_finished=True)

        # dec_outputs, _ = tf.nn.dynamic_rnn(decoder_cell, inputs=decoder_emb_inputs, initial_state=encoder_state)

    logits = dec_outputs.rnn_output

    # Inference
    start_tokens =  tf.fill([hparams.batch_size], char2numY['<SOD>'])
    end_token = char2numY['<EOD>']
    if not hparams.use_beamsearch_decode:

        inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            decoder_embedding,
            start_tokens,end_token)

        # Inference Decoder
        inference_decoder = tf.contrib.seq2seq.BasicDecoder(
            decoder_cells, inference_helper, encoder_state,
            output_layer=output_layer)
    else:

        encoder_state = tf.contrib.seq2seq.tile_batch(encoder_state, multiplier=hparams.beam_width)
        decoder_initial_state = decoder_cells.zero_state(hparams.batch_size * hparams.beam_width, tf.float32).clone(cell_state=encoder_state)

        inference_decoder = beam_search_decoder.BeamSearchDecoder(cell=decoder_cells,
                                                                  embedding=decoder_embedding,
                                                                  start_tokens=start_tokens,
                                                                  end_token=end_token,
                                                                  initial_state=decoder_initial_state,
                                                                  beam_width=hparams.beam_width,
                                                                  output_layer=output_layer)

    # Dynamic decoding
    outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
        inference_decoder,impute_finished = False, maximum_iterations=hparams.output_max_length)
    pred_outputs = outputs.sample_id
    if  hparams.use_beamsearch_decode:
          # [batch_size, max_time_step, beam_width]
          pred_outputs = pred_outputs[0]


    return logits,pred_outputs,_final_state
def tf_confusion_metrics(y_true_,y_pred_,num_classes=5):

    tf_cm = tf.cast(tf.confusion_matrix(y_true_, y_pred_,num_classes=None),"float")
    FP =  tf.reduce_sum(tf_cm,axis=0) - tf.diag_part(tf_cm)
    FN =  tf.reduce_sum(tf_cm,axis=1) - tf.diag_part(tf_cm)
    TP = tf.diag_part(tf_cm)
    TN = tf.reduce_sum(tf_cm) - (FP + FN + TP)

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP / (TP + FN)
    # Specificity or true negative rate
    TNR = TN / (TN + FP)
    # Precision or positive predictive value
    PPV = TP / (TP + FP)
    # Negative predictive value
    NPV = TN / (TN + FN)
    # Fall out or false positive rate
    FPR = FP / (FP + TN)
    # False negative rate
    FNR = FN / (TP + FN)
    # False discovery rate
    FDR = FP / (TP + FP)

    return FPR, FNR
def evaluate_metrics(cm,classes):

    print ("Confusion matrix:")
    print (cm)

    cm = cm.astype(np.float32)
    FP = cm.sum(axis=0) - np.diag(cm)
    FN = cm.sum(axis=1) - np.diag(cm)
    TP = np.diag(cm)
    TN = cm.sum() - (FP + FN + TP)
    # https://stackoverflow.com/questions/31324218/scikit-learn-how-to-obtain-true-positive-true-negative-false-positive-and-fal
    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP / (TP + FN)
    # Specificity or true negative rate
    TNR = TN / (TN + FP)
    # Precision or positive predictive value
    PPV = TP / (TP + FP)
    # Negative predictive value
    NPV = TN / (TN + FN)
    # Fall out or false positive rate
    FPR = FP / (FP + TN)
    # False negative rate
    FNR = FN / (TP + FN)
    # False discovery rate
    FDR = FP / (TP + FP)

    # Overall accuracy
    ACC = (TP + TN) / (TP + FP + FN + TN)
    # ACC_micro = (sum(TP) + sum(TN)) / (sum(TP) + sum(FP) + sum(FN) + sum(TN))
    ACC_macro = np.mean(ACC) # to get a sense of effectiveness of our method on the small classes we computed this average (macro-average)

    F1 = (2 * PPV * TPR) / (PPV + TPR)
    F1_macro = np.mean(F1)

    print ("Sample: {}".format(int(np.sum(cm))))
    n_classes = len(classes)
    for index_ in range(n_classes):
        print ("{}: {}".format(classes[index_], int(TP[index_] + FN[index_])))


    return ACC_macro,ACC, F1_macro, F1, TPR, TNR, PPV
random.seed(654) # to make have the same training set and test set each time the code is run, we use a fixed random seed

def build_whole_model(hparams,char2numY,inputs, targets,dec_inputs, keep_prob_):
    # logits = build_network(inputs,dec_inputs=dec_inputs)
    logits, pred_outputs,dec_states = build_network(hparams,char2numY,inputs, dec_inputs, keep_prob_)
    decoder_prediction = tf.argmax(logits, 2)

    # optimization operation
    with tf.name_scope("optimization"):
        # Loss function
        vars = tf.trainable_variables()
        beta = 0.001
        lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in vars
                           if 'bias' not in v.name]) * beta

        # class_ratio = [0.1,0.4, 0.1, 0.1, 0.1, 0.1,0.1]
        # class_weight = tf.constant(class_ratio)
        # weighted_logits = tf.multiply(logits, class_weight)

        loss_is = []
        for i in range(logits.get_shape().as_list()[-1]):
            class_fill_targets = tf.fill(tf.shape(targets), i)
            weights_i = tf.cast(tf.equal(targets, class_fill_targets), "float")
            loss_is.append(tf.contrib.seq2seq.sequence_loss(logits, targets, weights_i,average_across_batch=False))

        loss = tf.reduce_sum(loss_is,axis=0)

        # loss = tf.contrib.seq2seq.sequence_loss(logits, targets, tf.ones([hparams.batch_size, hparams.max_time_step+1])) #+1 is because of the <EOD> token
        # Optimizer
        loss = tf.reduce_mean(loss)+lossL2
        optimizer = tf.train.RMSPropOptimizer(1e-3).minimize(loss)

    return logits, pred_outputs, loss, optimizer,dec_states

def run_program(hparams,FLAGS):
    # load dataset
    num_folds = FLAGS.num_folds
    data_dir = FLAGS.data_dir
    if '13' in data_dir:
        data_version = 2013
    else:
        n_oversampling = 30000
        data_version = 2018

    output_dir = FLAGS.output_dir
    classes = FLAGS.classes
    n_classes = len(classes)

    path, channel_ename = os.path.split(data_dir)
    traindata_dir = os.path.join(os.path.abspath(os.path.join(data_dir, os.pardir)),'traindata/')
    print(str(datetime.now()))

    def evaluate_model(hparams, X_test, y_test, classes):
        acc_track = []
        n_classes = len(classes)
        y_true = []
        y_pred = []
        alignments_alphas_all = []  # (batch_num,B,max_time_step,max_time_step)
        for batch_i, (source_batch, target_batch) in enumerate(batch_data(X_test, y_test, hparams.batch_size)):
            # if source_batch.shape[1] != hparams.max_time_step:
            #     print ("Num of steps is: ", source_batch.shape[1])
            # try:
            pred_outputs_ = sess.run(pred_outputs,
                                     feed_dict={inputs: source_batch, keep_prob_: 1.0})

            alignments_alphas = sess.run(dec_states.alignment_history.stack(),
                                         feed_dict={inputs: source_batch, dec_inputs: target_batch[:, :-1],
                                                    keep_prob_: 1.0})

            # acc_track.append(np.mean(dec_input == target_batch))
            pred_outputs_ = pred_outputs_[:, :hparams.max_time_step]  # remove the last prediction <EOD>
            target_batch_ = target_batch[:, 1:-1]  # remove the last <EOD> and the first <SOD>
            acc_track.append(pred_outputs_ == target_batch_)

            alignments_alphas = alignments_alphas.transpose((1, 0, 2))
            alignments_alphas = alignments_alphas[:, :hparams.max_time_step]
            alignments_alphas_all.append(alignments_alphas)

            _y_true = target_batch_.flatten()
            _y_pred = pred_outputs_.flatten()

            y_true.extend(_y_true)
            y_pred.extend(_y_pred)

        cm = confusion_matrix(y_true, y_pred, labels=range(n_classes))
        ck_score = cohen_kappa_score(y_true, y_pred)
        acc_avg, acc, f1_macro, f1, sensitivity, specificity, PPV = evaluate_metrics(cm, classes)
        # print ("batch_i: {}").format(batch_i)
        print(
        'Average Accuracy -> {:>6.4f}, Macro F1 -> {:>6.4f} and Cohen\'s Kappa -> {:>6.4f} on test set'.format(acc_avg,
                                                                                                               f1_macro,
                                                                                                               ck_score))
        for index_ in range(n_classes):
            print(
            "\t{} rhythm -> Sensitivity: {:1.4f}, Specificity: {:1.4f}, Precision (PPV): {:1.4f}, F1 : {:1.4f} Accuracy: {:1.4f}".format(
                classes[index_],
                sensitivity[
                    index_],
                specificity[
                    index_], PPV[index_], f1[index_],
                acc[index_]))
        print(
        "\tAverage -> Sensitivity: {:1.4f}, Specificity: {:1.4f}, Precision (PPV): {:1.4f}, F1-score: {:1.4f}, Accuracy: {:1.4f}".format(
            np.mean(sensitivity), np.mean(specificity), np.mean(PPV), np.mean(f1), np.mean(acc)))

        return acc_avg, f1_macro, ck_score, y_true, y_pred, alignments_alphas_all

    def count_prameters():
        print ('# of Params: ', np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()]))

    # folds = [4,5,6,7]
    # # folds = [8,9,10,11]
    # # folds = [12,13,14,15]
    # # folds = [16,17,18,19] 
    # folds = [8]
    # for fold_idx in folds:
    for fold_idx in range(num_folds):
        start_time_fold_i = time.time()
        data_loader = SeqDataLoader(data_dir, num_folds, fold_idx, classes=classes)
        X_train, y_train, X_test, y_test = data_loader.load_data(seq_len=hparams.max_time_step)

        # preprocessing
        char2numY = dict(zip(classes, range(len(classes))))        
        pre_f1_macro = 0

        # <SOD> is a token to show start of decoding  and <EOD> is a token to indicate end of decoding
        char2numY['<SOD>'] = len(char2numY)
        char2numY['<EOD>'] = len(char2numY)
        num2charY = dict(zip(char2numY.values(), char2numY.keys()))


        # over-sampling: SMOTE:
        X_train = np.reshape(X_train,[X_train.shape[0]*X_train.shape[1],-1])
        y_train= y_train.flatten()

        if data_version == 2018:
            # extract just undersamples For 2018
            under_sample_len = 35000#30000
            Ws = np.where(y_train == char2numY['W'])[0]
            len_W = len(np.where(y_train == char2numY['W'])[0])
            permute = np.random.permutation(len_W)
            len_r = len_W - under_sample_len if (len_W - under_sample_len) > 0 else 0
            permute = permute[:len_r]
            y_train = np.delete(y_train,Ws[permute],axis =0)
            X_train = np.delete(X_train,Ws[permute],axis =0)

            under_sample_len = 35000 #40000
            N2s = np.where(y_train == char2numY['N2'])[0]
            len_N2 = len(np.where(y_train == char2numY['N2'])[0])
            permute = np.random.permutation(len_N2)
            len_r = len_N2 - under_sample_len if (len_N2 - under_sample_len) > 0 else 0
            permute = permute[:len_r]
            y_train = np.delete(y_train, N2s[permute],axis =0)
            X_train = np.delete(X_train, N2s[permute],axis =0)

        nums = []
        for cl in classes:
            nums.append(len(np.where(y_train == char2numY[cl])[0]))

        if (os.path.exists(traindata_dir) == False):
            os.mkdir(traindata_dir)
        fname = os.path.join(traindata_dir,'trainData_'+channel_ename+'_SMOTE_all_10s_f'+str(fold_idx)+'.npz')

        if (os.path.isfile(fname)):
            X_train, y_train,_ = data_loader.load_npz_file(fname)

        else:
            if data_version == 2013:
                n_osamples = nums[2] - 7000
                ratio = {0: n_osamples if nums[0] < n_osamples else nums[0], 1: n_osamples if nums[1] < n_osamples else nums[1],
                         2: nums[2], 3: n_osamples if nums[3] < n_osamples else nums[3], 4: n_osamples if nums[4] < n_osamples else nums[4]}


            if data_version==2018:
                ratio = {0: n_oversampling if nums[0] < n_oversampling else nums[0], 1: n_oversampling if nums[1] < n_oversampling else nums[1], 2: nums[2],
                     3: n_oversampling if nums[3] < n_oversampling else nums[3], 4: n_oversampling if nums[4] < n_oversampling else nums[4]}

            # ratio = {0: 40000 if nums[0] < 40000 else nums[0], 1: 27000 if nums[1] < 27000 else nums[1], 2: nums[2],
            #          3: 30000 if nums[3] < 30000 else nums[3], 4: 27000 if nums[4] < 27000 else nums[4]}
            sm = SMOTE(random_state=12,ratio=ratio)
            # sm = SMOTE(random_state=12, ratio=ratio)
            # sm = RandomUnderSampler(random_state=12,ratio=ratio)
            X_train, y_train = sm.fit_sample(X_train, y_train)
            data_loader.save_to_npz_file(X_train, y_train,data_loader.sampling_rate,fname)

        X_train = X_train[:(X_train.shape[0] // hparams.max_time_step) * hparams.max_time_step, :]
        y_train = y_train[:(X_train.shape[0] // hparams.max_time_step) * hparams.max_time_step]

        X_train = np.reshape(X_train,[-1,X_test.shape[1],X_test.shape[2]])
        y_train = np.reshape(y_train,[-1,y_test.shape[1],])

        # shuffle training data_2013
        permute = np.random.permutation(len(y_train))
        X_train = np.asarray(X_train)
        X_train = X_train[permute]
        y_train = y_train[permute]


        # add '<SOD>' to the beginning of each label sequence, and '<EOD>' to the end of each label sequence (both for training and test sets)
        y_train= [[char2numY['<SOD>']] + [y_ for y_ in date] + [char2numY['<EOD>']] for date in y_train]
        y_train = np.array(y_train)


        y_test= [[char2numY['<SOD>']] + [y_ for y_ in date] + [char2numY['<EOD>']] for date in y_test]
        y_test = np.array(y_test)

        print ('The training set after oversampling: ', classes)
        for cl in classes:
            print (cl, len(np.where(y_train==char2numY[cl])[0]))

        # training and testing the model
        if (os.path.exists(FLAGS.checkpoint_dir) == False):
            os.mkdir(FLAGS.checkpoint_dir)

        if (os.path.exists(output_dir) == False):
            os.makedirs(output_dir)
        loss_track = []
        with tf.Graph().as_default(), tf.Session() as sess:

            # Placeholders
            inputs = tf.placeholder(tf.float32, [None, hparams.max_time_step, hparams.input_depth], name='inputs')
            targets = tf.placeholder(tf.int32, (None, None), 'targets')
            dec_inputs = tf.placeholder(tf.int32, (None, None), 'decoder_inputs')
            keep_prob_ = tf.placeholder(tf.float32, name='keep')

            # model
            logits, pred_outputs, loss, optimizer,dec_states = build_whole_model(hparams,char2numY,inputs,targets, dec_inputs, keep_prob_)
            count_prameters()
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())
            saver = tf.train.Saver()
            print(str(datetime.now()))
            # ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            ckpt_name = "model_fold{:02d}.ckpt".format(fold_idx)
            ckpt_exist = False
            for file in os.listdir(FLAGS.checkpoint_dir):
                if file.startswith(ckpt_name):
                    ckpt_exist=True
            ckpt_name = os.path.join(FLAGS.checkpoint_dir, ckpt_name)

            # if ckpt and ckpt.model_checkpoint_path:
            # if os.path.isfile(ckpt_name):
            if ckpt_exist:
                # # Restore
                # ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
                # saver.restore(session, os.path.join(checkpoint_dir, ckpt_name))
                # saver.restore(sess, tf.train.latest_checkpoint(FLAGS.checkpoint_dir))

                saver.restore(sess, ckpt_name)

                # or 'load meta graph' and restore weights
                # saver = tf.train.import_meta_graph(ckpt_name+".meta")
                # saver.restore(session,tf.train.latest_checkpoint(checkpoint_dir))
                evaluate_model(hparams,X_test, y_test, classes)
            else:

                for epoch_i in range(hparams.epochs):
                    start_time = time.time()
                    # train_acc = []
                    y_true = []
                    y_pred  =[]
                    for batch_i, (source_batch, target_batch) in enumerate(batch_data(X_train, y_train, hparams.batch_size)):

                        # _, batch_loss, batch_logits, alignments_alphas = sess.run([optimizer, loss, logits,dec_states.alignment_history.stack()],
                        #     feed_dict = {inputs: source_batch,
                        #                  dec_inputs: target_batch[:, :-1],
                        #                  targets: target_batch[:, 1:],keep_prob_: 0.5} #,
                        #                                        )

                        _, batch_loss, batch_logits = sess.run([optimizer, loss, logits],
                            feed_dict = {inputs: source_batch,
                                         dec_inputs: target_batch[:, :-1],
                                         targets: target_batch[:, 1:],keep_prob_: 0.5} #,
                                                               )
                        loss_track.append(batch_loss)
                        # alignments_alphas = alignments_alphas.transpose((1, 0, 2))
                        # alignments_alphas = alignments_alphas[:, :hparams.max_time_step]
                        # train_acc.append(batch_logits.argmax(axis=-1) == target_batch[:,1:])
                        y_pred_ = batch_logits[:, :hparams.max_time_step].argmax(axis=-1)
                        y_true_ = target_batch[:, 1:-1]

                        # input_tags - word representation of input sequence, use None to skip
                        # output_tags - word representation of output sequence, use None to skip
                        # i - index of input element in batch
                        # input_tags = [[num2charY[i] for i in seq] for seq in y_true_]
                        # output_tags = [[num2charY[i] for i in seq] for seq in y_pred_]
                        # plot_attention(alignments_alphas[1, :, :], input_tags[1], output_tags[1])

                        y_true.extend(y_true_)
                        y_pred.extend(y_pred_)
                    # accuracy = np.mean(train_acc)
                    y_true = np.asarray(y_true)
                    y_pred = np.asarray(y_pred)
                    y_true = y_true.flatten()
                    y_pred = y_pred.flatten()
                    n_examples = len(y_true)
                    cm = confusion_matrix(y_true, y_pred,labels=range(len(char2numY)-2))
                    accuracy = np.mean(y_true == y_pred)
                    mf1 = f1_score(y_true, y_pred, average="macro")
                    ck_score = cohen_kappa_score(y_true, y_pred)

                    print('Epoch {:3} Loss: {:>6.3f} Accuracy: {:>6.4f} F1-score: {:>6.4f} Cohen\'s Kappa: {:>6.4f} Epoch duration: {:>6.3f}s'.format(epoch_i, np.mean(batch_loss),
                                                                                      accuracy,mf1,ck_score, time.time() - start_time))
                    if (epoch_i+1)%hparams.test_step==0:
                        acc_avg, f1_macro,ck_score, y_true, y_pred,alignments_alphas_all = evaluate_model(hparams,X_test, y_test,classes)

                        if np.nan_to_num(f1_macro) > pre_f1_macro: # save the better model based on the f1 score
                            print('Loss {:.4f} after {} epochs (batch_size={})'.format(loss_track[-1], epoch_i + 1,
                                                                                       hparams.batch_size))
                            pre_f1_macro = f1_macro
                            ckpt_name = "model_fold{:02d}.ckpt".format(fold_idx)
                            save_path = os.path.join(FLAGS.checkpoint_dir, ckpt_name)
                            saver.save(sess, save_path)
                            print("The best model (till now) saved in path: %s" % save_path)

                            # Save
                            save_dict = {
                                "y_true": y_true,
                                "y_pred": y_pred,
                                "ck_score": ck_score,
                                "alignments_alphas_all":alignments_alphas_all[:200],# we save just the first 200 batch results because it is so huge
                                }
                            filename = "output_"+channel_ename+"_fold{:02d}.npz".format(fold_idx)
                            save_path = os.path.join(output_dir, filename)
                            np.savez(save_path, **save_dict)
                            print("The best results (till now) saved in path: %s" % save_path)





                # plt.plot(loss_track)
                # plt.show()
                # print 'Classes: ', classes

            print(str(datetime.now()))
            print ('Fold{} took: {:>6.3f}s'.format(fold_idx, time.time()-start_time_fold_i))

def main(args=None):

    FLAGS = tf.app.flags.FLAGS

    # outputs_eeg_fpz_cz
    tf.app.flags.DEFINE_string('data_dir', 'data_2013/eeg_fpz_cz',
                               """Directory where to load training data_2013.""")
    tf.app.flags.DEFINE_string('output_dir', 'outputs_2013/outputs_eeg_fpz_cz',
                               """Directory where to save trained models """
                               """and outputs.""")
    tf.app.flags.DEFINE_integer('num_folds', 20,
                                """Number of cross-validation folds.""")
    tf.app.flags.DEFINE_list('classes', ['W', 'N1', 'N2', 'N3', 'REM'],  """classes""")
    tf.app.flags.DEFINE_string('checkpoint_dir', 'checkpoints-seq2seq-sleep-EDF', """Directory to save checkpoints""")
    # tf.app.flags.DEFINE_string('ckpt_name', 'seq2seq_sleep.ckpt',"""Check point name""")

    # hyperparameters
    hparams = tf.contrib.training.HParams(
        epochs=120,  # 300
        batch_size=20,  # 10
        num_units=128,
        embed_size=10,
        input_depth=3000,
        n_channels=100,
        bidirectional=False,
        use_attention=True,
        lstm_layers=2,
        attention_size=64,
        beam_width=4,
        use_beamsearch_decode=False,
        max_time_step=10,  # 5 3 second best 10# 40 # 100
        output_max_length=10 + 2,  # max_time_step +1
        akara2017=True,
        test_step=5,  # each 10 epochs
    )
    # classes = ['W', 'N1', 'N2', 'N3', 'REM']
    run_program(hparams,FLAGS)
if __name__ == "__main__":
     tf.app.run()