--- a
+++ b/ddc_pub/ddc_v3_unbiased.py
@@ -0,0 +1,1028 @@
+import os
+os.environ[
+    "TF_CPP_MIN_LOG_LEVEL"
+] = "3"  # Suppress UserWarning of TensorFlow while loading the model
+
+import numpy as np
+from datetime import datetime
+from functools import wraps
+import shutil, zipfile, tempfile, pickle
+
+from tensorflow.keras.layers import (
+    Input,
+    Concatenate,
+    Dense,
+    TimeDistributed,
+    BatchNormalization,
+)
+from tensorflow.compat.v1.keras.layers import (
+    CuDNNLSTM as LSTM,
+) 
+from tensorflow.keras.models import Model, load_model
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import ReduceLROnPlateau, LearningRateScheduler
+from tensorflow.keras.utils import multi_gpu_model, plot_model
+
+# Custom dependencies
+from molvecgen import SmilesVectorizer
+
+from ddc_pub.generators import SmilesGenerator2
+from ddc_pub.custom_callbacks import ModelAndHistoryCheckpoint, LearningRateSchedule
+
+
+def timed(func):
+    """
+    Timer decorator to benchmark functions.
+    """
+
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        tstart = datetime.now()
+        result = func(*args, **kwargs)
+        elapsed = (datetime.now() - tstart).microseconds / 1e6
+        print("Elapsed time: %.3f seconds." % elapsed)
+        return result
+
+    return wrapper
+
+
+class DDC:
+    def __init__(self, **kwargs):
+        """
+        # Arguments
+            kwargs:
+                x            : model input                                                  - np.ndarray of np.bytes_ or np.float64
+                y            : model output                                                 - np.ndarray of np.bytes_
+                model_name   : model filename to load                                       - string
+                dataset_info : dataset information including name, maxlen and charset       - hdf5
+                noise_std    : standard deviation of the noise layer in the latent space    - float
+                lstm_dim     : size of LSTM RNN layers                                      - int
+                dec_layers   : number of decoder layers                                     - int
+                td_dense_dim : size of TD Dense layers inbetween the LSTM ones
+                               to suppress network size                                     - int
+                batch_size   : the network's batch size                                     - int
+                codelayer_dim: dimensionality of the latent space or number of descriptors  - int
+                
+                
+        # Examples of __init__ usage
+            To *train* a blank model with encoder (autoencoder):
+                model = ddc.DDC(x              = mols,
+                                y              = mols,
+                                dataset_info   = info,
+                                noise_std      = 0.1,
+                                lstm_dim       = 256,
+                                dec_layers     = 3,
+                                td_dense_dim   = 0,
+                                batch_size     = 128,
+                                codelayer_dim  = 128)
+            
+            To *train* a blank model without encoder:
+                model = ddc.DDC(x              = descriptors,
+                                y              = mols,
+                                dataset_info   = info,
+                                noise_std      = 0.1,
+                                lstm_dim       = 256,
+                                dec_layers     = 3,
+                                td_dense_dim   = 0,
+                                batch_size     = 128)
+                                
+            To *re-train* a saved model with encoder (autoencoder):
+                model = ddc.DDC(x              = mols,
+                                y              = mols,
+                                model_name     = saved_model_name)
+            
+            To *re-train* a saved model without encoder:
+                model = ddc.DDC(x              = descriptors,
+                                y              = mols,
+                                model_name     = saved_model_name)
+                
+            To *test* a saved model:
+                model = ddc.DDC(model_name     = saved_model_name)
+
+        """
+
+        # Identify the mode to start the model in
+        if "x" in kwargs:
+            x = kwargs.get("x")
+            if "model_name" not in kwargs:
+                self.__mode = "train"
+            else:
+                self.__mode = "retrain"
+        elif "model_name" in kwargs:
+            self.__mode = "test"
+        else:
+            raise NameError("Cannot infer mode from arguments.")
+
+        print("Initializing model in %s mode." % self.__mode)
+
+        if self.mode == "train":
+            # Infer input type from type(x)
+            if type(x[0]) == np.bytes_:
+                print("Input type is 'binary mols'.")
+                self.__input_type = "mols"  # binary RDKit mols
+            else:
+                print("Check input type.")
+                self.__input_type = "other"  # other molecular descriptors
+
+            self.__maxlen = (
+                kwargs.get("dataset_info")["maxlen"] + 10
+            )  # Extend maxlen to avoid breaks in training
+            self.__charset = kwargs.get("dataset_info")["charset"]
+            self.__dataset_name = kwargs.get("dataset_info")["name"]
+            self.__lstm_dim = kwargs.get("lstm_dim", 256)
+            self.__h_activation = kwargs.get("h_activation", "relu")
+            self.__bn = kwargs.get("bn", True)
+            self.__bn_momentum = kwargs.get("bn_momentum", 0.9)
+            self.__noise_std = kwargs.get("noise_std", 0.01)
+            self.__td_dense_dim = kwargs.get(
+                "td_dense_dim", 0
+            )  # >0 squeezes RNN connections with Dense sandwiches
+            self.__batch_size = kwargs.get("batch_size", 256)
+            self.__dec_layers = kwargs.get("dec_layers", 2)
+
+            self.__codelayer_dim = kwargs.get("codelayer_dim", 128)
+
+            # Create the left/right-padding vectorizers
+            self.__smilesvec1 = SmilesVectorizer(
+                canonical=False,
+                augment=True,
+                maxlength=self.maxlen,
+                charset=self.charset,
+                binary=True,
+            )
+
+            self.__smilesvec2 = SmilesVectorizer(
+                canonical=False,
+                augment=True,
+                maxlength=self.maxlen,
+                charset=self.charset,
+                binary=True,
+                leftpad=False,
+            )
+
+            # self.train_gen.next() #This line is needed to set train_gen.dims (to be fixed in HetSmilesGenerator)
+            self.__input_shape = self.smilesvec1.dims
+            self.__dec_dims = list(self.smilesvec1.dims)
+            self.__dec_dims[0] = self.dec_dims[0] - 1
+            self.__dec_input_shape = self.dec_dims
+            self.__output_len = self.smilesvec1.dims[0] - 1
+            self.__output_dims = self.smilesvec1.dims[-1]
+
+            # Build data generators
+            self.__build_generators(x)
+
+            # Build full model out of the sub-models
+            self.__build_model()
+
+        # Retrain or Test mode
+        else:
+            self.__model_name = kwargs.get("model_name")
+
+            # Load the model
+            self.__load(self.model_name)
+
+            if self.mode == "retrain":
+                # Build data generators
+                self.__build_generators(x)
+
+        
+
+        # Show the resulting full model
+        print(self.model.summary())
+
+    """
+    Architecture properties.
+    """
+
+    @property
+    def lstm_dim(self):
+        return self.__lstm_dim
+
+    @property
+    def h_activation(self):
+        return self.__h_activation
+
+    @property
+    def bn(self):
+        return self.__bn
+
+    @property
+    def bn_momentum(self):
+        return self.__bn_momentum
+
+    @property
+    def noise_std(self):
+        return self.__noise_std
+
+    @property
+    def td_dense_dim(self):
+        return self.__td_dense_dim
+
+    @property
+    def batch_size(self):
+        return self.__batch_size
+
+    @property
+    def dec_layers(self):
+        return self.__dec_layers
+
+    @property
+    def codelayer_dim(self):
+        return self.__codelayer_dim
+
+    @property
+    def steps_per_epoch(self):
+        return self.__steps_per_epoch
+
+    @property
+    def validation_steps(self):
+        return self.__validation_steps
+
+    @property
+    def input_shape(self):
+        return self.__input_shape
+
+    @property
+    def dec_dims(self):
+        return self.__dec_dims
+
+    @property
+    def dec_input_shape(self):
+        return self.__dec_input_shape
+
+    @property
+    def output_len(self):
+        return self.__output_len
+
+    @property
+    def output_dims(self):
+        return self.__output_dims
+
+    @property
+    def batch_input_length(self):
+        return self.__batch_input_length
+
+    #@batch_input_length.setter
+    #def batch_input_length(self, value):
+    #    self.__batch_input_length = value
+    #    self.__build_sample_model(batch_input_length=value)
+
+    """
+    Models.
+    """
+
+    @property
+    def sample_model(self):
+        return self.__sample_model
+
+    @property
+    def multi_sample_model(self):
+        return self.__multi_sample_model
+
+    @property
+    def model(self):
+        return self.__model
+
+    """
+    Train properties.
+    """
+
+    @property
+    def epochs(self):
+        return self.__epochs
+
+    @property
+    def clipvalue(self):
+        return self.__clipvalue
+
+    @property
+    def lr(self):
+        return self.__lr
+
+    @property
+    def h(self):
+        return self.__h
+
+    """
+    Other properties.
+    """
+
+    @property
+    def mode(self):
+        return self.__mode
+
+    @property
+    def dataset_name(self):
+        return self.__dataset_name
+
+    @property
+    def model_name(self):
+        return self.__model_name
+
+    @property
+    def input_type(self):
+        return self.__input_type
+
+    @property
+    def maxlen(self):
+        return self.__maxlen
+
+    @property
+    def charset(self):
+        return self.__charset
+
+    @property
+    def smilesvec1(self):
+        return self.__smilesvec1
+
+    @property
+    def smilesvec2(self):
+        return self.__smilesvec2
+
+    @property
+    def train_gen(self):
+        return self.__train_gen
+
+    @property
+    def valid_gen(self):
+        return self.__valid_gen
+
+    """
+    Private methods.
+    """
+
+    def __build_generators(self, x, split=0.81050343):
+        """
+            Build data generators to be used in (re)training.
+            """
+
+        # Split dataset into train and validation sets
+        cut = int(split * len(x))
+        x_train = x[:cut]
+        x_valid = x[cut:]
+
+        self.__train_gen = SmilesGenerator2(
+            x_train,
+            None,
+            self.smilesvec1,
+            self.smilesvec2,
+            batch_size=self.batch_size,
+            shuffle=True,
+        )
+
+        self.__valid_gen = SmilesGenerator2(
+            x_valid,
+            None,
+            self.smilesvec1,
+            self.smilesvec2,
+            batch_size=self.batch_size,
+            shuffle=True,
+        )
+
+        # Calculate number of batches per training/validation epoch
+        train_samples = len(x_train)
+        valid_samples = len(x_valid)
+        self.__steps_per_epoch = train_samples // self.batch_size
+        self.__validation_steps = valid_samples // self.batch_size
+
+        print(
+            "Model received %d train samples and %d validation samples."
+            % (train_samples, valid_samples)
+        )
+
+    def __build_model(self):
+        """
+        RNN that generates random SMILES strings.
+        """
+
+        # This is the start character padded OHE smiles for teacher forcing
+        decoder_inputs = Input(shape=self.dec_input_shape, name="Decoder_Inputs")
+
+        # I/O tensor of the LSTM layers
+        x = decoder_inputs
+
+        for dec_layer in range(self.dec_layers):
+            # RNN layer
+            decoder_lstm = LSTM(
+                self.lstm_dim,
+                return_sequences=True,
+                name="Decoder_LSTM_" + str(dec_layer),
+            )
+
+            x = decoder_lstm(x)
+
+            if self.bn:
+                x = BatchNormalization(
+                    momentum=self.bn_momentum, name="BN_Decoder_" + str(dec_layer)
+                )(x)
+
+            # Squeeze LSTM interconnections using Dense layers
+            if self.td_dense_dim > 0:
+                x = TimeDistributed(
+                    Dense(self.td_dense_dim), name="Time_Distributed_" + str(dec_layer)
+                )(x)
+
+        # Final Dense layer to return soft labels (probabilities)
+        outputs = Dense(self.output_dims, activation="softmax", name="Dense_Decoder")(x)
+
+        # Define the batch_model
+        self.__model = Model(inputs=[decoder_inputs], outputs=[outputs])
+
+        # Name it!
+        self.__model._name = "model"
+
+    def __build_sample_model(self, batch_input_length) -> dict:
+        """
+        Model that predicts a single OHE character.
+        This model is generated from the modified config file of the self.batch_model.
+
+        Returns:
+            The dictionary of the configuration.
+        """
+
+        self.__batch_input_length = batch_input_length
+
+        # Get the configuration of the batch_model
+        config = self.model.get_config()
+
+        # Keep only the "Decoder_Inputs" as single input to the sample_model
+        config["input_layers"] = [config["input_layers"][0]]
+
+        # Find decoder states that are used as inputs in batch_model and remove them
+        idx_list = []
+        for idx, layer in enumerate(config["layers"]):
+
+            if "Decoder_State_" in layer["name"]:
+                idx_list.append(idx)
+
+        # Pop the layer from the layer list
+        # Revert indices to avoid re-arranging after deleting elements
+        for idx in sorted(idx_list, reverse=True):
+            config["layers"].pop(idx)
+
+        # Remove inbound_nodes dependencies of remaining layers on deleted ones
+        for layer in config["layers"]:
+            idx_list = []
+
+            try:
+                for idx, inbound_node in enumerate(layer["inbound_nodes"][0]):
+                    if "Decoder_State_" in inbound_node[0]:
+                        idx_list.append(idx)
+            # Catch the exception for first layer (Decoder_Inputs) that has empty list of inbound_nodes[0]
+            except:
+                pass
+
+            # Pop the inbound_nodes from the list
+            # Revert indices to avoid re-arranging
+            for idx in sorted(idx_list, reverse=True):
+                layer["inbound_nodes"][0].pop(idx)
+
+        # Change the batch_shape of input layer
+        config["layers"][0]["config"]["batch_input_shape"] = (
+            batch_input_length,
+            1,
+            self.dec_input_shape[-1],
+        )
+
+        # Finally, change the statefulness of the RNN layers
+        for layer in config["layers"]:
+            if "Decoder_LSTM_" in layer["name"]:
+                layer["config"]["stateful"] = True
+                # layer["config"]["return_sequences"] = True
+
+        # Define the sample_model using the modified config file
+        sample_model = Model.from_config(config)
+
+        # Copy the trained weights from the trained batch_model to the untrained sample_model
+        for layer in sample_model.layers:
+            # Get weights from the batch_model
+            weights = self.model.get_layer(layer.name).get_weights()
+            # Set the weights to the sample_model
+            sample_model.get_layer(layer.name).set_weights(weights)
+
+        if batch_input_length == 1:
+            self.__sample_model = sample_model
+
+        elif batch_input_length > 1:
+            self.__multi_sample_model = sample_model
+
+        return config
+
+    def __load(self, model_name):
+        """
+        Load complete model from a zip file.
+        To be called within __init__.
+        """
+
+        print("Loading model.")
+        tstart = datetime.now()
+
+        # Temporary directory to extract the zipped information
+        with tempfile.TemporaryDirectory() as dirpath:
+
+            # Unzip the directory that contains the saved model(s)
+            with zipfile.ZipFile(model_name + ".zip", "r") as zip_ref:
+                zip_ref.extractall(dirpath)
+
+            # Load metadata
+            metadata = pickle.load(open(dirpath + "/metadata.pickle", "rb"))
+
+            # Re-load metadata
+            self.__dict__.update(metadata)
+
+            # Load the model
+            self.__model = load_model(dirpath + "/model.h5")
+
+            # Build sample_model out of the trained batch_model
+            self.__build_sample_model(batch_input_length=1)  # Single-output model
+            self.__build_sample_model(
+                batch_input_length=256
+            )  # Multi-output model
+
+        print("Loading finished in %i seconds." % ((datetime.now() - tstart).seconds))
+
+    """
+    Public methods.
+    """
+
+    def fit(
+        self,
+        model_name,
+        epochs,
+        lr,
+        mini_epochs,
+        patience,
+        gpus=1,
+        workers=1,
+        use_multiprocessing=False,
+        verbose=2,
+        max_queue_size=10,
+        clipvalue=0,
+        save_period=5,
+        checkpoint_dir="/",
+        lr_decay=False,
+        lr_warmup=False,
+        sch_epoch_to_start=500,
+        sch_last_epoch=999,
+        sch_lr_init=1e-3,
+        sch_lr_final=1e-6,
+    ):
+        """
+        Fit the full model to the training data.
+        Supports multi-gpu training if gpus set to >1.
+        
+        # Arguments
+            kwargs:
+                model_name         : base name for the checkpoints                                       - string
+                epochs             : number of epochs to train in total                                  - int
+                lr                 : initial learning rate of the training                               - float
+                mini_epochs        : number of dividends of an epoch (==1 means no mini_epochs)          - int
+                patience           : minimum consecutive mini_epochs of stagnated learning rate to consider 
+                                     before lowering it                                                  - int
+                gpus               : number of gpus to use for multi-gpu training (==1 means single gpu) - int
+                workers            : number of CPU workers                                               - int
+                use_multiprocessing: flag for Keras multiprocessing                                      - boolean
+                verbose            : verbosity of the training                                           - int
+                max_queue_size     : max size of the generator queue                                     - int
+                clipvalue          : value of gradient clipping                                          - float
+                save_period        : mini_epochs every which to checkpoint the model                     - int
+                checkpoint_dir     : directory to store the checkpoints                                  - string
+                lr_decay           : flag to use exponential decay of learning rate                      - boolean
+                lr_warmup          : flag to use warmup for transfer learning                            - boolean
+        """
+
+        # Get parameter values if specified
+        self.__epochs = epochs
+        self.__lr = lr
+        self.__clipvalue = clipvalue
+
+        # Optimizer
+        if clipvalue > 0:
+            print("Using gradient clipping %.2f." % clipvalue)
+            opt = Adam(lr=self.lr, clipvalue=self.clipvalue)
+
+        else:
+            opt = Adam(lr=self.lr)
+
+        checkpoint_file = (
+            checkpoint_dir + "%s--{epoch:02d}--{val_loss:.4f}--{lr:.7f}" % model_name
+        )
+
+        # If model is untrained, history is blank
+        try:
+            history = self.h
+
+        # Else, append the history
+        except:
+            history = {}
+
+        mhcp = ModelAndHistoryCheckpoint(
+            filepath=checkpoint_file,
+            model_dict=self.__dict__,
+            monitor="val_loss",
+            verbose=1,
+            mode="min",
+            period=save_period,
+            history=history
+        )
+
+        # Training history
+        self.__h = mhcp.history
+
+        if lr_decay:
+            lr_schedule = LearningRateSchedule(
+                epoch_to_start=sch_epoch_to_start,
+                last_epoch=sch_last_epoch,
+                lr_init=sch_lr_init,
+                lr_final=sch_lr_final,
+            )
+
+            lr_scheduler = LearningRateScheduler(
+                schedule=lr_schedule.exp_decay, verbose=1
+            )
+
+            callbacks = [lr_scheduler, mhcp]
+
+        elif lr_warmup:
+            lr_schedule = LearningRateSchedule(
+                epoch_to_start=sch_epoch_to_start,
+                last_epoch=sch_last_epoch,
+                lr_init=sch_lr_init,
+                lr_final=sch_lr_final,
+            )
+
+            lr_scheduler = LearningRateScheduler(
+                schedule=lr_schedule.warmup, verbose=1
+            )
+
+            callbacks = [lr_scheduler, mhcp]
+
+        else:
+            rlr = ReduceLROnPlateau(
+                monitor="val_loss",
+                factor=0.5,
+                patience=patience,
+                min_lr=1e-6,
+                verbose=1,
+                min_delta=1e-4,
+            )
+
+            callbacks = [rlr, mhcp]
+
+        # Inspect training parameters at the start of the training
+        self.summary()
+
+        # Parallel training on multiple GPUs
+        if gpus > 1:
+            parallel_model = multi_gpu_model(self.model, gpus=gpus)
+            parallel_model.compile(loss="categorical_crossentropy", optimizer=opt)
+            # This `fit` call will be distributed on all GPUs.
+            # Each GPU will process (batch_size/gpus) samples per batch.
+            parallel_model.fit_generator(
+                self.train_gen,
+                steps_per_epoch=self.steps_per_epoch / mini_epochs,
+                epochs=mini_epochs * self.epochs,
+                validation_data=self.valid_gen,
+                validation_steps=self.validation_steps / mini_epochs,
+                callbacks=callbacks,
+                max_queue_size=max_queue_size,
+                workers=workers,
+                use_multiprocessing=use_multiprocessing,
+                verbose=verbose,
+            )  # 1 to show progress bar
+
+        elif gpus == 1:
+            self.model.compile(loss="categorical_crossentropy", optimizer=opt)
+            self.model.fit_generator(
+                self.train_gen,
+                steps_per_epoch=self.steps_per_epoch / mini_epochs,
+                epochs=mini_epochs * self.epochs,
+                validation_data=self.valid_gen,
+                validation_steps=self.validation_steps / mini_epochs,
+                callbacks=callbacks,
+                max_queue_size=10,
+                workers=workers,
+                use_multiprocessing=use_multiprocessing,
+                verbose=verbose,
+            )  # 1 to show progress bar
+
+        # Build sample_model out of the trained batch_model
+        self.__build_sample_model(batch_input_length=1)  # Single-output model
+        self.__build_sample_model(
+            batch_input_length=self.batch_size
+        )  # Multi-output model
+
+    
+    # @timed
+    def predict(self, temp=1, rng_seed=None):
+        """
+        Generate a single SMILES string.
+        
+        The states of the RNN are set based on the latent input.
+        
+        Careful, "latent" must be: the output of self.transform()
+                                   or
+                                   an array of molecular descriptors.
+        
+        If temp>0, multinomial sampling is used instead of selecting 
+        the single most probable character at each step.
+        
+        If temp==1, multinomial sampling without temperature scaling is used.
+        
+        Returns:
+            A single SMILES string and its NLL.
+        """
+        
+        # Pass rng_seed for repeatable sampling 
+        if rng_seed is not None:
+            np.random.seed(rng_seed)
+        # Reset the states between predictions because RNN is stateful!
+        self.sample_model.reset_states()
+
+        # Prepare the input char
+        startidx = self.smilesvec1._char_to_int[self.smilesvec1.startchar]
+        samplevec = np.zeros((1, 1, self.smilesvec1.dims[-1]))
+        samplevec[0, 0, startidx] = 1
+        smiles = ""
+        # Initialize Negative Log-Likelihood (NLL)
+        NLL = 0
+        # Loop and predict next char
+        for i in range(1000):
+            o = self.sample_model.predict(samplevec)
+            # Multinomial sampling with temperature scaling
+            if temp:
+                temp = abs(temp)  # Handle negative values
+                nextCharProbs = np.log(o) / temp
+                nextCharProbs = np.exp(nextCharProbs)
+                nextCharProbs = (
+                    nextCharProbs / nextCharProbs.sum() - 1e-8
+                )  # Re-normalize for float64 to make exactly 1.0 for np.random.multinomial
+                sampleidx = np.random.multinomial(
+                    1, nextCharProbs.squeeze(), 1
+                ).argmax()
+
+            # Else, select the most probable character
+            else:
+                sampleidx = np.argmax(o)
+
+            samplechar = self.smilesvec1._int_to_char[sampleidx]
+            if samplechar != self.smilesvec1.endchar:
+                # Append the new character
+                smiles += samplechar
+                samplevec = np.zeros((1, 1, self.smilesvec1.dims[-1]))
+                samplevec[0, 0, sampleidx] = 1
+                # Calculate negative log likelihood for the selected character given the sequence so far
+                NLL -= np.log(o[0][0][sampleidx])
+            else:
+                return smiles, NLL
+
+    # @timed
+    def predict_batch(self, temp=1, rng_seed=None):
+        """
+        Generate multiple random SMILES strings.
+        
+        If temp>0, multinomial sampling is used instead of selecting 
+        the single most probable character at each step.
+        
+        If temp==1, multinomial sampling without temperature scaling is used.
+        
+        Low temp leads to elimination of characters with low conditional probabilities.
+        """
+        
+        # Pass rng_seed for repeatable sampling 
+        if rng_seed is not None:
+            np.random.seed(rng_seed)    
+        # Reset the states between predictions because RNN is stateful!
+        self.multi_sample_model.reset_states()
+
+        # Index of input char "^"
+        startidx = self.smilesvec1._char_to_int[self.smilesvec1.startchar]
+        # Vectorize the input char for all SMILES
+        samplevec = np.zeros((self.batch_input_length, 1, self.smilesvec1.dims[-1]))
+        samplevec[:, 0, startidx] = 1
+        # Initialize arrays to store SMILES, their NLLs and their status
+        smiles = np.array([""] * self.batch_input_length, dtype=object)
+        NLL = np.zeros((self.batch_input_length,))
+        finished = np.array([False] * self.batch_input_length)
+
+        # Loop and predict next char
+        for i in range(1000):
+            o = self.multi_sample_model.predict(
+                samplevec, batch_size=self.batch_input_length
+            ).squeeze()
+
+            # Multinomial sampling with temperature scaling
+            if temp:
+                temp = abs(temp)  # No negative values
+                nextCharProbs = np.log(o) / temp
+                nextCharProbs = np.exp(nextCharProbs)  # .squeeze()
+
+                # Normalize probabilities
+                nextCharProbs = (nextCharProbs.T / nextCharProbs.sum(axis=1) - 1e-8).T
+                sampleidc = np.asarray(
+                    [
+                        np.random.multinomial(1, nextCharProb, 1).argmax()
+                        for nextCharProb in nextCharProbs
+                    ]
+                )
+
+            else:
+                sampleidc = np.argmax(o, axis=1)
+
+            samplechars = [self.smilesvec1._int_to_char[idx] for idx in sampleidc]
+
+            for idx, samplechar in enumerate(samplechars):
+                if not finished[idx]:
+                    if samplechar != self.smilesvec1.endchar:
+                        # Append the SMILES with the next character
+                        smiles[idx] += self.smilesvec1._int_to_char[sampleidc[idx]]
+                        samplevec = np.zeros(
+                            (self.batch_input_length, 1, self.smilesvec1.dims[-1])
+                        )
+                        # One-Hot Encode the character
+                        # samplevec[:,0,sampleidc] = 1
+                        for count, sampleidx in enumerate(sampleidc):
+                            samplevec[count, 0, sampleidx] = 1
+                        # Calculate negative log likelihood for the selected character given the sequence so far
+                        NLL[idx] -= np.log(o[idx][sampleidc[idx]])
+                    else:
+                        finished[idx] = True
+                        # print("SMILES has finished at %i" %i)
+
+            # If all SMILES are finished, i.e. the endchar "$" has been generated, stop the generation
+            if finished.sum() == len(finished):
+                return smiles, NLL
+
+    @timed
+    def get_smiles_nll(self, smiles_ref) -> float:
+        """
+        Calculate the NLL of a given SMILES string if its descriptors are used as RNN states.
+        
+        Returns:
+            The NLL of sampling a given SMILES string.
+        """
+
+        # Reset the states between predictions because RNN is stateful!
+        self.sample_model.reset_states()
+
+        # Prepare the input char
+        startidx = self.smilesvec1._char_to_int[self.smilesvec1.startchar]
+        samplevec = np.zeros((1, 1, self.smilesvec1.dims[-1]))
+        samplevec[0, 0, startidx] = 1
+
+        # Initialize Negative Log-Likelihood (NLL)
+        NLL = 0
+        # Loop and predict next char
+        for i in range(1000):
+            o = self.sample_model.predict(samplevec)
+
+            samplechar = smiles_ref[i]
+            sampleidx = self.smilesvec1._char_to_int[samplechar]
+
+            if i != len(smiles_ref) - 1:
+                samplevec = np.zeros((1, 1, self.smilesvec1.dims[-1]))
+                samplevec[0, 0, sampleidx] = 1
+                # Calculate negative log likelihood for the selected character given the sequence so far
+                NLL -= np.log(o[0][0][sampleidx])
+            else:
+                return NLL
+
+    @timed
+    def get_smiles_nll_batch(self, smiles_ref) -> list:
+        """
+        Calculate the individual NLL for a batch of known SMILES strings.
+        Batch size is equal to self.batch_input_length so reset it if needed.
+        
+        Returns:
+            NLL of sampling all listed SMILES.
+        """
+
+        # Reset the states between predictions because RNN is stateful!
+        self.multi_sample_model.reset_states()
+
+        # Index of input char "^"
+        startidx = self.smilesvec1._char_to_int[self.smilesvec1.startchar]
+        # Vectorize the input char for all SMILES
+        samplevec = np.zeros((self.batch_input_length, 1, self.smilesvec1.dims[-1]))
+        samplevec[:, 0, startidx] = 1
+        # Initialize arrays to store NLLs and flag if a SMILES is finished
+        NLL = np.zeros((self.batch_input_length,))
+        finished = np.array([False] * self.batch_input_length)
+
+        # Loop and predict next char
+        for i in range(1000):
+            o = self.multi_sample_model.predict(
+                samplevec, batch_size=self.batch_input_length
+            ).squeeze()
+            samplechars = []
+
+            for smiles in smiles_ref:
+                try:
+                    samplechars.append(smiles[i])
+                except:
+                    # This is a finished SMILES, so "i" exceeds dimensions
+                    samplechars.append("$")
+
+            sampleidc = np.asarray(
+                [self.smilesvec1._char_to_int[char] for char in samplechars]
+            )
+
+            for idx, samplechar in enumerate(samplechars):
+                if not finished[idx]:
+                    if i != len(smiles_ref[idx]) - 1:
+                        samplevec = np.zeros(
+                            (self.batch_input_length, 1, self.smilesvec1.dims[-1])
+                        )
+                        # One-Hot Encode the character
+                        for count, sampleidx in enumerate(sampleidc):
+                            samplevec[count, 0, sampleidx] = 1
+                        # Calculate negative log likelihood for the selected character given the sequence so far
+                        NLL[idx] -= np.log(o[idx][sampleidc[idx]])
+                    else:
+                        finished[idx] = True
+
+            # If all SMILES are finished, i.e. the endchar "$" has been generated, stop the generation
+            if finished.sum() == len(finished):
+                return NLL
+
+    def summary(self):
+        """
+        Echo the training configuration for inspection.
+        """
+
+        print(
+            "\nModel trained with dataset %s that has maxlen=%d and charset=%s for %d epochs."
+            % (self.dataset_name, self.maxlen, self.charset, self.epochs)
+        )
+
+        print(
+            "noise_std: %.6f, lstm_dim: %d, dec_layers: %d, td_dense_dim: %d, batch_size: %d, codelayer_dim: %d, lr: %.6f."
+            % (
+                self.noise_std,
+                self.lstm_dim,
+                self.dec_layers,
+                self.td_dense_dim,
+                self.batch_size,
+                self.codelayer_dim,
+                self.lr,
+            )
+        )
+
+    def get_graphs(self):
+        """
+        Export the graphs of the model and its submodels to png files.
+        Requires "pydot" and "graphviz" to be installed (pip install graphviz && pip install pydot).
+        """
+
+        try:
+            from keras.utils import plot_model
+            from keras.utils.vis_utils import model_to_dot
+
+            # from IPython.display import SVG
+
+            plot_model(self.model, to_file="model.png")
+
+            print("Model exported to png.")
+
+        except:
+            print("Check pydot and graphviz installation.")
+
+    @timed
+    def save(self, model_name):
+        """
+        Save model in a zip file.
+        """
+
+        with tempfile.TemporaryDirectory() as dirpath:
+
+            # Save the Keras model
+            self.model.save(dirpath + "/model.h5")
+
+            # Exclude unpicklable and unwanted attributes
+            excl_attr = [
+                "_DDC__mode",  # excluded because it is always identified within self.__init__()
+                "_DDC__train_gen",  # unpicklable
+                "_DDC__valid_gen",  # unpicklable
+                "_DDC__sample_model",  # unpicklable
+                "_DDC__multi_sample_model",  # unpicklable
+                "_DDC__model",
+            ]  # unpicklable
+
+            # Cannot deepcopy self.__dict__ because of Keras' thread lock so this is
+            # bypassed by popping and re-inserting the unpicklable attributes
+            to_add = {}
+            # Remove unpicklable attributes
+            for attr in excl_attr:
+                to_add[attr] = self.__dict__.pop(attr, None)
+
+            # Pickle metadata, i.e. almost everything but the Keras models and generators
+            pickle.dump(self.__dict__, open(dirpath + "/metadata.pickle", "wb"))
+
+            # Zip directory with its contents
+            shutil.make_archive(model_name, "zip", dirpath)
+
+            # Finally, re-load the popped elements for the model to be usable
+            for attr in excl_attr:
+                self.__dict__[attr] = to_add[attr]
+
+            print("Model saved.")