b/tf2/seq_seq_annot_aami.py
+import numpy as np
+import matplotlib.pyplot as plt
+import scipy.io as spio
+from  sklearn.preprocessing import MinMaxScaler
+import random
+import time
+import os
+from datetime import datetime
+from sklearn.metrics import confusion_matrix
+import tensorflow as tf
+import tensorflow_addons as tfa
+from imblearn.over_sampling import SMOTE
+from sklearn.model_selection import train_test_split
+import argparse
+random.seed(654)
+def read_mitbih(filename, max_time=100, classes= ['F', 'N', 'S', 'V', 'Q'], max_nlabel=100):
+    def normalize(data):
+        data = np.nan_to_num(data)  # removing NaNs and Infs
+        data = data - np.mean(data)
+        data = data / np.std(data)
+        return data
+    # read data
+    data = []
+    samples = spio.loadmat(filename + ".mat")
+    samples = samples['s2s_mitbih']
+    values = samples[0]['seg_values']
+    labels = samples[0]['seg_labels']
+    num_annots = sum([item.shape[0] for item in values])
+    n_seqs = num_annots / max_time
+    #  add all segments(beats) together
+    l_data = 0
+    for i, item in enumerate(values):
+        l = item.shape[0]
+        for itm in item:
+            if l_data == n_seqs * max_time:
+                break
+            data.append(itm[0])
+            l_data = l_data + 1
+    #  add all labels together
+    l_lables  = 0
+    t_lables = []
+    for i, item in enumerate(labels):
+        if len(t_lables)==n_seqs*max_time:
+            break
+        item= item[0]
+        for lebel in item:
+            if l_lables == n_seqs * max_time:
+                break
+            t_lables.append(str(lebel))
+            l_lables = l_lables + 1
+    del values
+    data = np.asarray(data)
+    shape_v = data.shape
+    data = np.reshape(data, [shape_v[0], -1])
+    t_lables = np.array(t_lables)
+    _data  = np.asarray([],dtype=np.float64).reshape(0,shape_v[1])
+    _labels = np.asarray([],dtype=np.dtype('|S1')).reshape(0,)
+    for cl in classes:
+        _label = np.where(t_lables == cl)
+        permute = np.random.permutation(len(_label[0]))
+        _label = _label[0][permute[:max_nlabel]]
+        # _label = _label[0][:max_nlabel]
+        # permute = np.random.permutation(len(_label))
+        # _label = _label[permute]
+        _data = np.concatenate((_data, data[_label]))
+        _labels = np.concatenate((_labels, t_lables[_label]))
+    data = _data[:int(len(_data)/ max_time) * max_time, :]
+    _labels = _labels[:int(len(_data) / max_time) * max_time]
+    # data = _data
+    #  split data into sublist of 100=se_len values
+    data = [data[i:i + max_time] for i in range(0, len(data), max_time)]
+    labels = [_labels[i:i + max_time] for i in range(0, len(_labels), max_time)]
+    # shuffle
+    permute = np.random.permutation(len(labels))
+    data = np.asarray(data)
+    labels = np.asarray(labels)
+    data= data[permute]
+    labels = labels[permute]
+    print('Records processed!')
+    return data, labels
+def evaluate_metrics(confusion_matrix):
+    # https://stackoverflow.com/questions/31324218/scikit-learn-how-to-obtain-true-positive-true-negative-false-positive-and-fal
+    FP = confusion_matrix.sum(axis=0) - np.diag(confusion_matrix)
+    FN = confusion_matrix.sum(axis=1) - np.diag(confusion_matrix)
+    TP = np.diag(confusion_matrix)
+    TN = confusion_matrix.sum() - (FP + FN + TP)
+    # Sensitivity, hit rate, recall, or true positive rate
+    TPR = TP / (TP + FN)
+    # Specificity or true negative rate
+    TNR = TN / (TN + FP)
+    # Precision or positive predictive value
+    PPV = TP / (TP + FP)
+    # Negative predictive value
+    NPV = TN / (TN + FN)
+    # Fall out or false positive rate
+    FPR = FP / (FP + TN)
+    # False negative rate
+    FNR = FN / (TP + FN)
+    # False discovery rate
+    FDR = FP / (TP + FP)
+    # Overall accuracy
+    ACC = (TP + TN) / (TP + FP + FN + TN)
+    # ACC_micro = (sum(TP) + sum(TN)) / (sum(TP) + sum(FP) + sum(FN) + sum(TN))
+    ACC_macro = np.mean(ACC) # to get a sense of effectiveness of our method on the small classes we computed this average (macro-average)
+    return ACC_macro, ACC, TPR, TNR, PPV
+def batch_data(x, y, batch_size):
+    shuffle = np.random.permutation(len(x))
+    start = 0
+    #     from IPython.core.debugger import Tracer; Tracer()()
+    x = x[shuffle]
+    y = y[shuffle]
+    while start + batch_size <= len(x):
+        yield x[start:start + batch_size], y[start:start + batch_size]
+        start += batch_size
+def build_network(inputs, dec_inputs,char2numY,n_channels=10,input_depth=280,num_units=128,max_time=10,bidirectional=False):
+    _inputs = tf.reshape(inputs, [-1, n_channels, int(input_depth / n_channels)])
+    # _inputs = tf.reshape(inputs, [-1,input_depth,n_channels])
+    # #(batch*max_time, 280, 1) --> (N, 280, 18)
+    conv1 = tf.compat.v1.layers.conv1d(inputs=_inputs, filters=32, kernel_size=2, strides=1,
+                             padding='same', activation=tf.nn.relu)
+    max_pool_1 = tf.compat.v1.layers.max_pooling1d(inputs=conv1, pool_size=2, strides=2, padding='same')
+    conv2 = tf.compat.v1.layers.conv1d(inputs=max_pool_1, filters=64, kernel_size=2, strides=1,
+                             padding='same', activation=tf.nn.relu)
+    max_pool_2 = tf.compat.v1.layers.max_pooling1d(inputs=conv2, pool_size=2, strides=2, padding='same')
+    conv3 = tf.compat.v1.layers.conv1d(inputs=max_pool_2, filters=128, kernel_size=2, strides=1,
+                             padding='same', activation=tf.nn.relu)
+    shape = conv3.get_shape().as_list()
+    data_input_embed = tf.reshape(conv3, (-1, max_time, shape[1] * shape[2]))
+    # timesteps = max_time
+    #
+    # lstm_in = tf.unstack(data_input_embed, timesteps, 1)
+    # lstm_size = 128
+    # # Get lstm cell output
+    # # Add LSTM layers
+    # lstm_cell = tf.contrib.rnn.BasicLSTMCell(lstm_size)
+    # data_input_embed, states = tf.contrib.rnn.static_rnn(lstm_cell, lstm_in, dtype=tf.float32)
+    # data_input_embed = tf.stack(data_input_embed, 1)
+    # shape = data_input_embed.get_shape().as_list()
+    embed_size = 10  # 128 lstm_size # shape[1]*shape[2]
+    # Embedding layers
+    output_embedding = tf.Variable(tf.random.uniform((len(char2numY), embed_size), -1.0, 1.0), name='dec_embedding')
+    data_output_embed = tf.nn.embedding_lookup(params=output_embedding, ids=dec_inputs)
+    with tf.compat.v1.variable_scope("encoding") as encoding_scope:
+        if not bidirectional:
+            # Regular approach with LSTM units
+            lstm_enc = tf.compat.v1.nn.rnn_cell.LSTMCell(num_units)
+            _, last_state = tf.compat.v1.nn.dynamic_rnn(lstm_enc, inputs=data_input_embed, dtype=tf.float32)
+        else:
+            # Using a bidirectional LSTM architecture instead
+            enc_fw_cell = tf.compat.v1.nn.rnn_cell.LSTMCell(num_units)
+            enc_bw_cell = tf.compat.v1.nn.rnn_cell.LSTMCell(num_units)
+            ((enc_fw_out, enc_bw_out), (enc_fw_final, enc_bw_final)) = tf.compat.v1.nn.bidirectional_dynamic_rnn(
+                cell_fw=enc_fw_cell,
+                cell_bw=enc_bw_cell,
+                inputs=data_input_embed,
+                dtype=tf.float32)
+            enc_fin_c = tf.concat((enc_fw_final.c, enc_bw_final.c), 1)
+            enc_fin_h = tf.concat((enc_fw_final.h, enc_bw_final.h), 1)
+            last_state = tf.nn.rnn_cell.LSTMStateTuple(c=enc_fin_c, h=enc_fin_h)
+    with tf.compat.v1.variable_scope("decoding") as decoding_scope:
+        if not bidirectional:
+            lstm_dec = tf.compat.v1.nn.rnn_cell.LSTMCell(num_units)
+        else:
+            lstm_dec = tf.compat.v1.nn.rnn_cell.LSTMCell(2 * num_units)
+        dec_outputs, _ = tf.compat.v1.nn.dynamic_rnn(lstm_dec, inputs=data_output_embed, initial_state=last_state)
+    logits = tf.compat.v1.layers.dense(dec_outputs, units=len(char2numY), use_bias=True)
+    return logits
+def str2bool(v):
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--epochs', type=int, default=500)
+    parser.add_argument('--max_time', type=int, default=10)
+    parser.add_argument('--test_steps', type=int, default=10)
+    parser.add_argument('--batch_size', type=int, default=20)
+    parser.add_argument('--data_dir', type=str, default='data/s2s_mitbih_aami')
+    parser.add_argument('--bidirectional', type=str2bool, default=str2bool('False'))
+    # parser.add_argument('--lstm_layers', type=int, default=2)
+    parser.add_argument('--num_units', type=int, default=128)
+    parser.add_argument('--n_oversampling', type=int, default=10000)
+    parser.add_argument('--checkpoint_dir', type=str, default='checkpoints-seq2seq')
+    parser.add_argument('--ckpt_name', type=str, default='seq2seq_mitbih.ckpt')
+    parser.add_argument('--classes', nargs='+', type=chr,
+                        default=['F','N', 'S','V'])
+    args = parser.parse_args()
+    run_program(args)
+def run_program(args):
+    print(args)
+    max_time = args.max_time # 5 3 second best 10# 40 # 100
+    epochs = args.epochs # 300
+    batch_size = args.batch_size # 10
+    num_units = args.num_units
+    bidirectional = args.bidirectional
+    # lstm_layers = args.lstm_layers
+    n_oversampling = args.n_oversampling
+    checkpoint_dir = args.checkpoint_dir
+    ckpt_name = args.ckpt_name
+    test_steps = args.test_steps
+    classes= args.classes
+    filename = args.data_dir
+    X, Y = read_mitbih(filename,max_time,classes=classes,max_nlabel=100000) #11000
+    print(("# of sequences: ", len(X)))
+    input_depth = X.shape[2]
+    n_channels = 10
+    classes = np.unique(Y)
+    char2numY = dict(list(zip(classes, list(range(len(classes))))))
+    n_classes = len(classes)
+    print(('Classes: ', classes))
+    for cl in classes:
+        ind = np.where(classes == cl)[0][0]
+        print((cl, len(np.where(Y.flatten()==cl)[0])))
+    # char2numX['<PAD>'] = len(char2numX)
+    # num2charX = dict(zip(char2numX.values(), char2numX.keys()))
+    # max_len = max([len(date) for date in x])
+    #
+    # x = [[char2numX['<PAD>']]*(max_len - len(date)) +[char2numX[x_] for x_ in date] for date in x]
+    # print(''.join([num2charX[x_] for x_ in x[4]]))
+    # x = np.array(x)
+    char2numY['<GO>'] = len(char2numY)
+    num2charY = dict(list(zip(list(char2numY.values()), list(char2numY.keys()))))
+    Y = [[char2numY['<GO>']] + [char2numY[y_] for y_ in date] for date in Y]
+    Y = np.array(Y)
+    x_seq_length = len(X[0])
+    y_seq_length = len(Y[0])- 1
+    # Placeholders
+    tf.compat.v1.disable_eager_execution()
+    inputs = tf.compat.v1.placeholder(tf.float32, [None, max_time, input_depth], name = 'inputs')
+    targets = tf.compat.v1.placeholder(tf.int32, (None, None), 'targets')
+    dec_inputs = tf.compat.v1.placeholder(tf.int32, (None, None), 'output')
+    # logits = build_network(inputs,dec_inputs=dec_inputs)
+    logits = build_network(inputs, dec_inputs, char2numY, n_channels=n_channels, input_depth=input_depth, num_units=num_units, max_time=max_time,
+                  bidirectional=bidirectional)
+    # decoder_prediction = tf.argmax(logits, 2)
+    # confusion = tf.confusion_matrix(labels=tf.argmax(targets, 1), predictions=tf.argmax(logits, 2), num_classes=len(char2numY) - 1)# it is wrong
+    # mean_accuracy,update_mean_accuracy = tf.metrics.mean_per_class_accuracy(labels=targets, predictions=decoder_prediction, num_classes=len(char2numY) - 1)
+    with tf.compat.v1.name_scope("optimization"):
+        # Loss function
+        vars = tf.compat.v1.trainable_variables()
+        beta = 0.001
+        lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in vars
+                            if 'bias' not in v.name]) * beta
+        loss = tfa.seq2seq.sequence_loss(logits, targets, tf.ones([batch_size, y_seq_length]))
+        # Optimizer
+        loss = tf.reduce_mean(input_tensor=loss + lossL2)
+        optimizer = tf.compat.v1.train.RMSPropOptimizer(1e-3).minimize(loss)
+    # split the dataset into the training and test sets
+    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
+    # over-sampling: SMOTE
+    X_train = np.reshape(X_train,[X_train.shape[0]*X_train.shape[1],-1])
+    y_train= y_train[:,1:].flatten()
+    nums = []
+    for cl in classes:
+        ind = np.where(classes == cl)[0][0]
+        nums.append(len(np.where(y_train.flatten()==ind)[0]))
+    # ratio={0:nums[3],1:nums[1],2:nums[3],3:nums[3]} # the best with 11000 for N
+    ratio={0:n_oversampling,1:nums[1],2:n_oversampling,3:n_oversampling}
+    sm = SMOTE(random_state=12,ratio=ratio)
+    X_train, y_train = sm.fit_sample(X_train, y_train)
+    X_train = X_train[:int(X_train.shape[0]/max_time)*max_time,:]
+    y_train = y_train[:int(X_train.shape[0]/max_time)*max_time]
+    X_train = np.reshape(X_train,[-1,X_test.shape[1],X_test.shape[2]])
+    y_train = np.reshape(y_train,[-1,y_test.shape[1]-1,])
+    y_train= [[char2numY['<GO>']] + [y_ for y_ in date] for date in y_train]
+    y_train = np.array(y_train)
+    print(('Classes in the training set: ', classes))
+    for cl in classes:
+        ind = np.where(classes == cl)[0][0]
+        print((cl, len(np.where(y_train.flatten()==ind)[0])))
+    print ("------------------y_train samples--------------------")
+    for ii in range(2):
+      print((''.join([num2charY[y_] for y_ in list(y_train[ii+5])])))
+    print ("------------------y_test samples--------------------")
+    for ii in range(2):
+      print((''.join([num2charY[y_] for y_ in list(y_test[ii+5])])))
+    def test_model():
+        # source_batch, target_batch = next(batch_data(X_test, y_test, batch_size))
+        acc_track = []
+        sum_test_conf = []
+        for batch_i, (source_batch, target_batch) in enumerate(batch_data(X_test, y_test, batch_size)):
+            dec_input = np.zeros((len(source_batch), 1)) + char2numY['<GO>']
+            for i in range(y_seq_length):
+                batch_logits = sess.run(logits,
+                                        feed_dict={inputs: source_batch, dec_inputs: dec_input})
+                prediction = batch_logits[:, -1].argmax(axis=-1)
+                dec_input = np.hstack([dec_input, prediction[:, None]])
+            # acc_track.append(np.mean(dec_input == target_batch))
+            acc_track.append(dec_input[:, 1:] == target_batch[:, 1:])
+            y_true= target_batch[:, 1:].flatten()
+            y_pred = dec_input[:, 1:].flatten()
+            sum_test_conf.append(confusion_matrix(y_true, y_pred,labels=list(range(len(char2numY)-1))))
+        sum_test_conf= np.mean(np.array(sum_test_conf, dtype=np.float32), axis=0)
+        # print('Accuracy on test set is: {:>6.4f}'.format(np.mean(acc_track)))
+        # mean_p_class, accuracy_classes = sess.run([mean_accuracy, update_mean_accuracy],
+        #                                           feed_dict={inputs: source_batch,
+        #                                                      dec_inputs: dec_input[:, :-1],
+        #                                                      targets: target_batch[:, 1:]})
+        # print (mean_p_class)
+        # print (accuracy_classes)
+        acc_avg, acc, sensitivity, specificity, PPV = evaluate_metrics(sum_test_conf)
+        print(('Average Accuracy is: {:>6.4f} on test set'.format(acc_avg)))
+        for index_ in range(n_classes):
+            print(("\t{} rhythm -> Sensitivity: {:1.4f}, Specificity : {:1.4f}, Precision (PPV) : {:1.4f}, Accuracy : {:1.4f}".format(classes[index_],
+                                                                                                          sensitivity[
+                                                                                                              index_],
+                                                                                                          specificity[
+                                                                                                              index_],PPV[index_],
+                                                                                                          acc[index_])))
+        print(("\t Average -> Sensitivity: {:1.4f}, Specificity : {:1.4f}, Precision (PPV) : {:1.4f}, Accuracy : {:1.4f}".format(np.mean(sensitivity),np.mean(specificity),np.mean(PPV),np.mean(acc))))
+        return acc_avg, acc, sensitivity, specificity, PPV
+    loss_track = []
+    def count_prameters():
+        print(('# of Params: ', np.sum([np.prod(v.get_shape().as_list()) for v in tf.compat.v1.trainable_variables()])))
+    count_prameters()
+    if (os.path.exists(checkpoint_dir) == False):
+        os.mkdir(checkpoint_dir)
+    # train the graph
+    with tf.compat.v1.Session() as sess:
+        sess.run(tf.compat.v1.global_variables_initializer())
+        sess.run(tf.compat.v1.local_variables_initializer())
+        saver = tf.compat.v1.train.Saver()
+        print((str(datetime.now())))
+        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
+        pre_acc_avg = 0.0
+        if ckpt and ckpt.model_checkpoint_path:
+            # # Restore
+            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
+            # saver.restore(session, os.path.join(checkpoint_dir, ckpt_name))
+            saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir))
+            # or 'load meta graph' and restore weights
+            # saver = tf.train.import_meta_graph(ckpt_name+".meta")
+            # saver.restore(session,tf.train.latest_checkpoint(checkpoint_dir))
+            test_model()
+        else:
+            for epoch_i in range(epochs):
+                start_time = time.time()
+                train_acc = []
+                for batch_i, (source_batch, target_batch) in enumerate(batch_data(X_train, y_train, batch_size)):
+                    _, batch_loss, batch_logits = sess.run([optimizer, loss, logits],
+                        feed_dict = {inputs: source_batch,
+                                     dec_inputs: target_batch[:, :-1],
+                                     targets: target_batch[:, 1:]})
+                    loss_track.append(batch_loss)
+                    train_acc.append(batch_logits.argmax(axis=-1) == target_batch[:,1:])
+                # mean_p_class,accuracy_classes = sess.run([mean_accuracy,update_mean_accuracy],
+                #                         feed_dict={inputs: source_batch,
+                #                                               dec_inputs: target_batch[:, :-1],
+                #                                               targets: target_batch[:, 1:]})
+                # accuracy = np.mean(batch_logits.argmax(axis=-1) == target_batch[:,1:])
+                accuracy = np.mean(train_acc)
+                print(('Epoch {:3} Loss: {:>6.3f} Accuracy: {:>6.4f} Epoch duration: {:>6.3f}s'.format(epoch_i, batch_loss,
+                                                                                  accuracy, time.time() - start_time)))
+                if epoch_i%test_steps==0:
+                    acc_avg, acc, sensitivity, specificity, PPV= test_model()
+                    print(('loss {:.4f} after {} epochs (batch_size={})'.format(loss_track[-1], epoch_i + 1, batch_size)))
+                    save_path = os.path.join(checkpoint_dir, ckpt_name)
+                    saver.save(sess, save_path)
+                    print(("Model saved in path: %s" % save_path))
+                    # if np.nan_to_num(acc_avg) > pre_acc_avg:  # save the better model based on the f1 score
+                    #     print('loss {:.4f} after {} epochs (batch_size={})'.format(loss_track[-1], epoch_i + 1, batch_size))
+                    #     pre_acc_avg = acc_avg
+                    #     save_path =os.path.join(checkpoint_dir, ckpt_name)
+                    #     saver.save(sess, save_path)
+                    #     print("The best model (till now) saved in path: %s" % save_path)
+            plt.plot(loss_track)
+            plt.show()
+        print((str(datetime.now())))
+        # test_model()
+if __name__ == '__main__':
+    main()