OmiEmbed / Git / [03464c] /util/visualizer.py

Models:
AlyssaS/
OmiEmbed
Downloads: 1
[03464c]: / util / visualizer.py
History
Download this file
539 lines (453 with data), 28.0 kB

import os
import time
import numpy as np
import pandas as pd
import sklearn as sk
from sklearn.preprocessing import label_binarize
from util import util
from util import metrics
from torch.utils.tensorboard import SummaryWriter


class Visualizer:
    """
    This class print/save logging information
    """

    def __init__(self, param):
        """
        Initialize the Visualizer class
        """
        self.param = param
        self.output_path = os.path.join(param.checkpoints_dir, param.experiment_name)
        tb_dir = os.path.join(self.output_path, 'tb_log')
        util.mkdir(tb_dir)

        if param.isTrain:
            # Create a logging file to store training losses
            self.train_log_filename = os.path.join(self.output_path, 'train_log.txt')
            with open(self.train_log_filename, 'a') as log_file:
                now = time.strftime('%c')
                log_file.write('----------------------- Training Log ({:s}) -----------------------\n'.format(now))

            self.train_summary_filename = os.path.join(self.output_path, 'train_summary.txt')
            with open(self.train_summary_filename, 'a') as log_file:
                now = time.strftime('%c')
                log_file.write('----------------------- Training Summary ({:s}) -----------------------\n'.format(now))

            # Create log folder for TensorBoard
            tb_train_dir = os.path.join(self.output_path, 'tb_log', 'train')
            util.mkdir(tb_train_dir)
            util.clear_dir(tb_train_dir)

            # Create TensorBoard writer
            self.train_writer = SummaryWriter(log_dir=tb_train_dir)

        if param.isTest:
            # Create a logging file to store testing metrics
            self.test_log_filename = os.path.join(self.output_path, 'test_log.txt')
            with open(self.test_log_filename, 'a') as log_file:
                now = time.strftime('%c')
                log_file.write('----------------------- Testing Log ({:s}) -----------------------\n'.format(now))

            self.test_summary_filename = os.path.join(self.output_path, 'test_summary.txt')
            with open(self.test_summary_filename, 'a') as log_file:
                now = time.strftime('%c')
                log_file.write('----------------------- Testing Summary ({:s}) -----------------------\n'.format(now))

            # Create log folder for TensorBoard
            tb_test_dir = os.path.join(self.output_path, 'tb_log', 'test')
            util.mkdir(tb_test_dir)
            util.clear_dir(tb_test_dir)

            # Create TensorBoard writer
            self.test_writer = SummaryWriter(log_dir=tb_test_dir)

    def print_train_log(self, epoch, iteration, losses_dict, metrics_dict, load_time, comp_time, batch_size, dataset_size, with_time=True):
        """
        print train log on console and save the message to the disk

        Parameters:
            epoch (int)                     -- current epoch
            iteration (int)                 -- current training iteration during this epoch
            losses_dict (OrderedDict)       -- training losses stored in the ordered dict
            metrics_dict (OrderedDict)      -- metrics stored in the ordered dict
            load_time (float)               -- data loading time per data point (normalized by batch_size)
            comp_time (float)               -- computational time per data point (normalized by batch_size)
            batch_size (int)                -- batch size of training
            dataset_size (int)              -- size of the training dataset
            with_time (bool)                -- print the running time or not
        """
        data_point_covered = min((iteration + 1) * batch_size, dataset_size)
        if with_time:
            message = '[TRAIN] [Epoch: {:3d}   Iter: {:4d}   Load_t: {:.3f}   Comp_t: {:.3f}]   '.format(epoch, data_point_covered, load_time, comp_time)
        else:
            message = '[TRAIN] [Epoch: {:3d}   Iter: {:4d}]\n'.format(epoch, data_point_covered)
        for name, loss in losses_dict.items():
            message += '{:s}: {:.3f}   '.format(name, loss[-1])
        for name, metric in metrics_dict.items():
            message += '{:s}: {:.3f}   '.format(name, metric)

        print(message)  # print the message

        with open(self.train_log_filename, 'a') as log_file:
            log_file.write(message + '\n')  # save the message

    def print_train_summary(self, epoch, losses_dict, output_dict, train_time, current_lr):
        """
        print the summary of this training epoch

        Parameters:
            epoch (int)                             -- epoch number of this training model
            losses_dict (OrderedDict)               -- the losses dictionary
            output_dict (OrderedDict)               -- the downstream output dictionary
            train_time (float)                      -- time used for training this epoch
            current_lr (float)                      -- the learning rate of this epoch
        """
        write_message = '{:s}\t'.format(str(epoch))
        print_message = '[TRAIN] [Epoch: {:3d}]\n'.format(int(epoch))

        for name, loss in losses_dict.items():
            write_message += '{:.6f}\t'.format(np.mean(loss))
            print_message += name + ': {:.3f}   '.format(np.mean(loss))
            self.train_writer.add_scalar('loss_'+name, np.mean(loss), epoch)

        metrics_dict = self.get_epoch_metrics(output_dict)
        for name, metric in metrics_dict.items():
            write_message += '{:.6f}\t'.format(metric)
            print_message += name + ': {:.3f}   '.format(metric)
            self.train_writer.add_scalar('metric_'+name, metric, epoch)

        train_time_msg = 'Training time used: {:.3f}s'.format(train_time)
        print_message += '\n' + train_time_msg
        with open(self.train_log_filename, 'a') as log_file:
            log_file.write(train_time_msg + '\n')

        current_lr_msg = 'Learning rate for this epoch: {:.7f}'.format(current_lr)
        print_message += '\n' + current_lr_msg
        self.train_writer.add_scalar('lr', current_lr, epoch)

        with open(self.train_summary_filename, 'a') as log_file:
            log_file.write(write_message + '\n')

        print(print_message)

    def print_test_log(self, epoch, iteration, losses_dict, metrics_dict, batch_size, dataset_size):
        """
        print performance metrics of this iteration on console and save the message to the disk

        Parameters:
            epoch (int)                     -- epoch number of this testing model
            iteration (int)                 -- current testing iteration during this epoch
            losses_dict (OrderedDict)       -- training losses stored in the ordered dict
            metrics_dict (OrderedDict)      -- metrics stored in the ordered dict
            batch_size (int)                -- batch size of testing
            dataset_size (int)              -- size of the testing dataset
        """
        data_point_covered = min((iteration + 1) * batch_size, dataset_size)
        message = '[TEST] [Epoch: {:3d}   Iter: {:4d}]   '.format(int(epoch), data_point_covered)
        for name, loss in losses_dict.items():
            message += '{:s}: {:.3f}   '.format(name, loss[-1])
        for name, metric in metrics_dict.items():
            message += '{:s}: {:.3f}   '.format(name, metric)

        print(message)

        with open(self.test_log_filename, 'a') as log_file:
            log_file.write(message + '\n')

    def print_test_summary(self, epoch, losses_dict, output_dict, test_time):
        """
        print the summary of this testing epoch

        Parameters:
            epoch (int)                             -- epoch number of this testing model
            losses_dict (OrderedDict)               -- the losses dictionary
            output_dict (OrderedDict)               -- the downstream output dictionary
            test_time (float)                       -- time used for testing this epoch
        """
        write_message = '{:s}\t'.format(str(epoch))
        print_message = '[TEST] [Epoch: {:3d}]      '.format(int(epoch))

        for name, loss in losses_dict.items():
            # write_message += '{:.6f}\t'.format(np.mean(loss))
            print_message += name + ': {:.3f}   '.format(np.mean(loss))
            self.test_writer.add_scalar('loss_'+name, np.mean(loss), epoch)

        metrics_dict = self.get_epoch_metrics(output_dict)

        for name, metric in metrics_dict.items():
            write_message += '{:.6f}\t'.format(metric)
            print_message += name + ': {:.3f}   '.format(metric)
            self.test_writer.add_scalar('metric_' + name, metric, epoch)

        with open(self.test_summary_filename, 'a') as log_file:
            log_file.write(write_message + '\n')

        test_time_msg = 'Testing time used: {:.3f}s'.format(test_time)
        print_message += '\n' + test_time_msg
        print(print_message)
        with open(self.test_log_filename, 'a') as log_file:
            log_file.write(test_time_msg + '\n')

    def get_epoch_metrics(self, output_dict):
        """
        Get the downstream task metrics for whole epoch

        Parameters:
            output_dict (OrderedDict)  -- the output dictionary used to compute the downstream task metrics
        """
        if self.param.downstream_task == 'classification':
            y_true = output_dict['y_true'].cpu().numpy()
            y_true_binary = label_binarize(y_true, classes=range(self.param.class_num))
            y_pred = output_dict['y_pred'].cpu().numpy()
            y_prob = output_dict['y_prob'].cpu().numpy()
            if self.param.class_num == 2:
                y_prob = y_prob[:, 1]

            accuracy = sk.metrics.accuracy_score(y_true, y_pred)
            precision = sk.metrics.precision_score(y_true, y_pred, average='macro', zero_division=0)
            recall = sk.metrics.recall_score(y_true, y_pred, average='macro', zero_division=0)
            f1 = sk.metrics.f1_score(y_true, y_pred, average='macro', zero_division=0)
            try:
                auc = sk.metrics.roc_auc_score(y_true_binary, y_prob, multi_class='ovo', average='macro')
            except ValueError:
                auc = -1
                print('ValueError: ROC AUC score is not defined in this case.')

            return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1, 'auc': auc}

        elif self.param.downstream_task == 'regression':
            y_true = output_dict['y_true'].cpu().numpy()
            y_pred = output_dict['y_pred'].cpu().detach().numpy()

            mse = sk.metrics.mean_squared_error(y_true, y_pred)
            rmse = sk.metrics.mean_squared_error(y_true, y_pred, squared=False)
            mae = sk.metrics.mean_absolute_error(y_true, y_pred)
            medae = sk.metrics.median_absolute_error(y_true, y_pred)
            r2 = sk.metrics.r2_score(y_true, y_pred)

            return {'mse': mse, 'rmse': rmse, 'mae': mae, 'medae': medae, 'r2': r2}

        elif self.param.downstream_task == 'survival':
            metrics_start_time = time.time()

            y_true_E = output_dict['y_true_E'].cpu().numpy()
            y_true_T = output_dict['y_true_T'].cpu().numpy()
            y_pred_risk = output_dict['risk'].cpu().numpy()
            y_pred_survival = output_dict['survival'].cpu().numpy()

            time_points = util.get_time_points(self.param.survival_T_max, self.param.time_num)

            try:
                c_index = metrics.c_index(y_true_T, y_true_E, y_pred_risk)
            except ValueError:
                c_index = -1
                print('ValueError: NaNs detected in input when calculating c-index.')

            try:
                ibs = metrics.ibs(y_true_T, y_true_E, y_pred_survival, time_points)
            except ValueError:
                ibs = -1
                print('ValueError: NaNs detected in input when calculating integrated brier score.')

            metrics_time = time.time() - metrics_start_time
            print('Metrics computing time: {:.3f}s'.format(metrics_time))

            return {'c-index': c_index, 'ibs': ibs}

        elif self.param.downstream_task == 'multitask':
            metrics_start_time = time.time()

            # Survival
            y_true_E = output_dict['y_true_E'].cpu().numpy()
            y_true_T = output_dict['y_true_T'].cpu().numpy()
            y_pred_risk = output_dict['risk'].cpu().numpy()
            y_pred_survival = output_dict['survival'].cpu().numpy()
            time_points = util.get_time_points(self.param.survival_T_max, self.param.time_num)
            try:
                c_index = metrics.c_index(y_true_T, y_true_E, y_pred_risk)
            except ValueError:
                c_index = -1
                print('ValueError: NaNs detected in input when calculating c-index.')
            try:
                ibs = metrics.ibs(y_true_T, y_true_E, y_pred_survival, time_points)
            except ValueError:
                ibs = -1
                print('ValueError: NaNs detected in input when calculating integrated brier score.')

            # Classification
            y_true_cla = output_dict['y_true_cla'].cpu().numpy()
            y_true_cla_binary = label_binarize(y_true_cla, classes=range(self.param.class_num))
            y_pred_cla = output_dict['y_pred_cla'].cpu().numpy()
            y_prob_cla = output_dict['y_prob_cla'].cpu().numpy()
            if self.param.class_num == 2:
                y_prob_cla = y_prob_cla[:, 1]
            accuracy = sk.metrics.accuracy_score(y_true_cla, y_pred_cla)
            precision = sk.metrics.precision_score(y_true_cla, y_pred_cla, average='macro', zero_division=0)
            recall = sk.metrics.recall_score(y_true_cla, y_pred_cla, average='macro', zero_division=0)
            f1 = sk.metrics.f1_score(y_true_cla, y_pred_cla, average='macro', zero_division=0)
            '''
            try:
                auc = sk.metrics.roc_auc_score(y_true_cla_binary, y_prob_cla, multi_class='ovo', average='macro')
            except ValueError:
                auc = -1
                print('ValueError: ROC AUC score is not defined in this case.')
            '''

            # Regression
            y_true_reg = output_dict['y_true_reg'].cpu().numpy()
            y_pred_reg = output_dict['y_pred_reg'].cpu().detach().numpy()
            # mse = sk.metrics.mean_squared_error(y_true_reg, y_pred_reg)
            rmse = sk.metrics.mean_squared_error(y_true_reg, y_pred_reg, squared=False)
            mae = sk.metrics.mean_absolute_error(y_true_reg, y_pred_reg)
            medae = sk.metrics.median_absolute_error(y_true_reg, y_pred_reg)
            r2 = sk.metrics.r2_score(y_true_reg, y_pred_reg)

            metrics_time = time.time() - metrics_start_time
            print('Metrics computing time: {:.3f}s'.format(metrics_time))

            return {'c-index': c_index, 'ibs': ibs, 'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1, 'rmse': rmse, 'mae': mae, 'medae': medae, 'r2': r2}

        elif self.param.downstream_task == 'alltask':
            metrics_start_time = time.time()

            # Survival
            y_true_E = output_dict['y_true_E'].cpu().numpy()
            y_true_T = output_dict['y_true_T'].cpu().numpy()
            y_pred_risk = output_dict['risk'].cpu().numpy()
            y_pred_survival = output_dict['survival'].cpu().numpy()
            time_points = util.get_time_points(self.param.survival_T_max, self.param.time_num)
            try:
                c_index = metrics.c_index(y_true_T, y_true_E, y_pred_risk)
            except ValueError:
                c_index = -1
                print('ValueError: NaNs detected in input when calculating c-index.')
            try:
                ibs = metrics.ibs(y_true_T, y_true_E, y_pred_survival, time_points)
            except ValueError:
                ibs = -1
                print('ValueError: NaNs detected in input when calculating integrated brier score.')

            # Classification
            accuracy = []
            f1 = []
            auc = []
            for i in range(self.param.task_num - 2):
                y_true_cla = output_dict['y_true_cla'][i].cpu().numpy()
                y_true_cla_binary = label_binarize(y_true_cla, classes=range(self.param.class_num[i]))
                y_pred_cla = output_dict['y_pred_cla'][i].cpu().numpy()
                y_prob_cla = output_dict['y_prob_cla'][i].cpu().numpy()
                if self.param.class_num[i] == 2:
                    y_prob_cla = y_prob_cla[:, 1]
                accuracy.append(sk.metrics.accuracy_score(y_true_cla, y_pred_cla))
                f1.append(sk.metrics.f1_score(y_true_cla, y_pred_cla, average='macro', zero_division=0))
                try:
                    auc.append(sk.metrics.roc_auc_score(y_true_cla_binary, y_prob_cla, multi_class='ovo', average='macro'))
                except ValueError:
                    auc.append(-1)
                    print('ValueError: ROC AUC score is not defined in this case.')

            # Regression
            y_true_reg = output_dict['y_true_reg'].cpu().numpy()
            y_pred_reg = output_dict['y_pred_reg'].cpu().detach().numpy()
            # mse = sk.metrics.mean_squared_error(y_true_reg, y_pred_reg)
            rmse = sk.metrics.mean_squared_error(y_true_reg, y_pred_reg, squared=False)
            # mae = sk.metrics.mean_absolute_error(y_true_reg, y_pred_reg)
            # medae = sk.metrics.median_absolute_error(y_true_reg, y_pred_reg)
            r2 = sk.metrics.r2_score(y_true_reg, y_pred_reg)

            metrics_time = time.time() - metrics_start_time
            print('Metrics computing time: {:.3f}s'.format(metrics_time))

            return {'c-index': c_index, 'ibs': ibs, 'accuracy_1': accuracy[0], 'f1_1': f1[0], 'auc_1': auc[0], 'accuracy_2': accuracy[1], 'f1_2': f1[1], 'auc_2': auc[1], 'accuracy_3': accuracy[2], 'f1_3': f1[2], 'auc_3': auc[2], 'accuracy_4': accuracy[3], 'f1_4': f1[3], 'auc_4': auc[3], 'accuracy_5': accuracy[4], 'f1_5': f1[4], 'auc_5': auc[4], 'rmse': rmse, 'r2': r2}

    def save_output_dict(self, output_dict):
        """
        Save the downstream task output to disk

        Parameters:
            output_dict (OrderedDict)  -- the downstream task output dictionary to be saved
        """
        down_path = os.path.join(self.output_path, 'down_output')
        util.mkdir(down_path)
        if self.param.downstream_task == 'classification':
            # Prepare files
            index = output_dict['index'].numpy()
            y_true = output_dict['y_true'].cpu().numpy()
            y_pred = output_dict['y_pred'].cpu().numpy()
            y_prob = output_dict['y_prob'].cpu().numpy()

            sample_list = self.param.sample_list[index]

            # Output files
            y_df = pd.DataFrame({'sample': sample_list, 'y_true': y_true, 'y_pred': y_pred}, index=index)
            y_df_path = os.path.join(down_path, 'y_df.tsv')
            y_df.to_csv(y_df_path, sep='\t')

            prob_df = pd.DataFrame(y_prob, columns=range(self.param.class_num), index=sample_list)
            y_prob_path = os.path.join(down_path, 'y_prob.tsv')
            prob_df.to_csv(y_prob_path, sep='\t')

        elif self.param.downstream_task == 'regression':
            # Prepare files
            index = output_dict['index'].numpy()
            y_true = output_dict['y_true'].cpu().numpy()
            y_pred = np.squeeze(output_dict['y_pred'].cpu().detach().numpy())

            sample_list = self.param.sample_list[index]

            # Output files
            y_df = pd.DataFrame({'sample': sample_list, 'y_true': y_true, 'y_pred': y_pred}, index=index)
            y_df_path = os.path.join(down_path, 'y_df.tsv')
            y_df.to_csv(y_df_path, sep='\t')

        elif self.param.downstream_task == 'survival':
            # Prepare files
            index = output_dict['index'].numpy()
            y_true_E = output_dict['y_true_E'].cpu().numpy()
            y_true_T = output_dict['y_true_T'].cpu().numpy()
            y_pred_risk = output_dict['risk'].cpu().numpy()
            survival_function = output_dict['survival'].cpu().numpy()
            y_out = output_dict['y_out'].cpu().numpy()

            sample_list = self.param.sample_list[index]
            time_points = util.get_time_points(self.param.survival_T_max, self.param.time_num)

            # Output files
            y_df = pd.DataFrame({'sample': sample_list, 'true_T': y_true_T, 'true_E': y_true_E, 'pred_risk': y_pred_risk}, index=index)
            y_df_path = os.path.join(down_path, 'y_df.tsv')
            y_df.to_csv(y_df_path, sep='\t')

            survival_function_df = pd.DataFrame(survival_function, columns=time_points, index=sample_list)
            survival_function_path = os.path.join(down_path, 'survival_function.tsv')
            survival_function_df.to_csv(survival_function_path, sep='\t')

            y_out_df = pd.DataFrame(y_out, index=sample_list)
            y_out_path = os.path.join(down_path, 'y_out.tsv')
            y_out_df.to_csv(y_out_path, sep='\t')

        elif self.param.downstream_task == 'multitask':
            # Survival
            index = output_dict['index'].numpy()
            y_true_E = output_dict['y_true_E'].cpu().numpy()
            y_true_T = output_dict['y_true_T'].cpu().numpy()
            y_pred_risk = output_dict['risk'].cpu().numpy()
            survival_function = output_dict['survival'].cpu().numpy()
            y_out_sur = output_dict['y_out_sur'].cpu().numpy()
            sample_list = self.param.sample_list[index]
            time_points = util.get_time_points(self.param.survival_T_max, self.param.time_num)
            y_df_sur = pd.DataFrame(
                {'sample': sample_list, 'true_T': y_true_T, 'true_E': y_true_E, 'pred_risk': y_pred_risk}, index=index)
            y_df_sur_path = os.path.join(down_path, 'y_df_survival.tsv')
            y_df_sur.to_csv(y_df_sur_path, sep='\t')
            survival_function_df = pd.DataFrame(survival_function, columns=time_points, index=sample_list)
            survival_function_path = os.path.join(down_path, 'survival_function.tsv')
            survival_function_df.to_csv(survival_function_path, sep='\t')
            y_out_sur_df = pd.DataFrame(y_out_sur, index=sample_list)
            y_out_sur_path = os.path.join(down_path, 'y_out_survival.tsv')
            y_out_sur_df.to_csv(y_out_sur_path, sep='\t')

            # Classification
            y_true_cla = output_dict['y_true_cla'].cpu().numpy()
            y_pred_cla = output_dict['y_pred_cla'].cpu().numpy()
            y_prob_cla = output_dict['y_prob_cla'].cpu().numpy()
            y_df_cla = pd.DataFrame({'sample': sample_list, 'y_true': y_true_cla, 'y_pred': y_pred_cla}, index=index)
            y_df_cla_path = os.path.join(down_path, 'y_df_classification.tsv')
            y_df_cla.to_csv(y_df_cla_path, sep='\t')
            prob_cla_df = pd.DataFrame(y_prob_cla, columns=range(self.param.class_num), index=sample_list)
            y_prob_cla_path = os.path.join(down_path, 'y_prob_classification.tsv')
            prob_cla_df.to_csv(y_prob_cla_path, sep='\t')

            # Regression
            y_true_reg = output_dict['y_true_reg'].cpu().numpy()
            y_pred_reg = np.squeeze(output_dict['y_pred_reg'].cpu().detach().numpy())
            y_df_reg = pd.DataFrame({'sample': sample_list, 'y_true': y_true_reg, 'y_pred': y_pred_reg}, index=index)
            y_df_reg_path = os.path.join(down_path, 'y_df_regression.tsv')
            y_df_reg.to_csv(y_df_reg_path, sep='\t')

        elif self.param.downstream_task == 'alltask':
            # Survival
            index = output_dict['index'].numpy()
            y_true_E = output_dict['y_true_E'].cpu().numpy()
            y_true_T = output_dict['y_true_T'].cpu().numpy()
            y_pred_risk = output_dict['risk'].cpu().numpy()
            survival_function = output_dict['survival'].cpu().numpy()
            y_out_sur = output_dict['y_out_sur'].cpu().numpy()
            sample_list = self.param.sample_list[index]
            time_points = util.get_time_points(self.param.survival_T_max, self.param.time_num)
            y_df_sur = pd.DataFrame(
                {'sample': sample_list, 'true_T': y_true_T, 'true_E': y_true_E, 'pred_risk': y_pred_risk}, index=index)
            y_df_sur_path = os.path.join(down_path, 'y_df_survival.tsv')
            y_df_sur.to_csv(y_df_sur_path, sep='\t')
            survival_function_df = pd.DataFrame(survival_function, columns=time_points, index=sample_list)
            survival_function_path = os.path.join(down_path, 'survival_function.tsv')
            survival_function_df.to_csv(survival_function_path, sep='\t')
            y_out_sur_df = pd.DataFrame(y_out_sur, index=sample_list)
            y_out_sur_path = os.path.join(down_path, 'y_out_survival.tsv')
            y_out_sur_df.to_csv(y_out_sur_path, sep='\t')

            # Classification
            for i in range(self.param.task_num - 2):
                y_true_cla = output_dict['y_true_cla'][i].cpu().numpy()
                y_pred_cla = output_dict['y_pred_cla'][i].cpu().numpy()
                y_prob_cla = output_dict['y_prob_cla'][i].cpu().numpy()
                y_df_cla = pd.DataFrame({'sample': sample_list, 'y_true': y_true_cla, 'y_pred': y_pred_cla}, index=index)
                y_df_cla_path = os.path.join(down_path, 'y_df_classification_'+str(i+1)+'.tsv')
                y_df_cla.to_csv(y_df_cla_path, sep='\t')
                prob_cla_df = pd.DataFrame(y_prob_cla, columns=range(self.param.class_num[i]), index=sample_list)
                y_prob_cla_path = os.path.join(down_path, 'y_prob_classification_'+str(i+1)+'.tsv')
                prob_cla_df.to_csv(y_prob_cla_path, sep='\t')

            # Regression
            y_true_reg = output_dict['y_true_reg'].cpu().numpy()
            y_pred_reg = np.squeeze(output_dict['y_pred_reg'].cpu().detach().numpy())
            y_df_reg = pd.DataFrame({'sample': sample_list, 'y_true': y_true_reg, 'y_pred': y_pred_reg}, index=index)
            y_df_reg_path = os.path.join(down_path, 'y_df_regression.tsv')
            y_df_reg.to_csv(y_df_reg_path, sep='\t')


    def save_latent_space(self, latent_dict, sample_list):
        """
            save the latent space matrix to disc

            Parameters:
                latent_dict (OrderedDict)          -- the latent space dictionary
                sample_list (ndarray)               -- the sample list for the latent matrix
        """
        reordered_sample_list = sample_list[latent_dict['index'].astype(int)]
        latent_df = pd.DataFrame(latent_dict['latent'], index=reordered_sample_list)
        output_path = os.path.join(self.param.checkpoints_dir, self.param.experiment_name, 'latent_space.tsv')
        print('Saving the latent space matrix...')
        latent_df.to_csv(output_path, sep='\t')


    @staticmethod
    def print_phase(phase):
        """
        print the phase information

        Parameters:
            phase (int)             -- the phase of the training process
        """
        if phase == 'p1':
            print('PHASE 1: Unsupervised Phase')
        elif phase == 'p2':
            print('PHASE 2: Supervised Phase')
        elif phase == 'p3':
            print('PHASE 3: Supervised Phase')