--- a
+++ b/examples/vae_example/data_loader.py
@@ -0,0 +1,75 @@
+"""
+This file is to encode SMILES and SELFIES into one-hot encodings
+"""
+
+import numpy as np
+
+import selfies as sf
+
+
+def smile_to_hot(smile, largest_smile_len, alphabet):
+    """Go from a single smile string to a one-hot encoding.
+    """
+
+    char_to_int = dict((c, i) for i, c in enumerate(alphabet))
+
+    # pad with ' '
+    smile += ' ' * (largest_smile_len - len(smile))
+
+    # integer encode input smile
+    integer_encoded = [char_to_int[char] for char in smile]
+
+    # one hot-encode input smile
+    onehot_encoded = list()
+    for value in integer_encoded:
+        letter = [0 for _ in range(len(alphabet))]
+        letter[value] = 1
+        onehot_encoded.append(letter)
+    return integer_encoded, np.array(onehot_encoded)
+
+
+def multiple_smile_to_hot(smiles_list, largest_molecule_len, alphabet):
+    """Convert a list of smile strings to a one-hot encoding
+
+    Returned shape (num_smiles x len_of_largest_smile x len_smile_encoding)
+    """
+
+    hot_list = []
+    for s in smiles_list:
+        _, onehot_encoded = smile_to_hot(s, largest_molecule_len, alphabet)
+        hot_list.append(onehot_encoded)
+    return np.array(hot_list)
+
+
+def selfies_to_hot(selfie, largest_selfie_len, alphabet):
+    """Go from a single selfies string to a one-hot encoding.
+    """
+
+    symbol_to_int = dict((c, i) for i, c in enumerate(alphabet))
+
+    # pad with [nop]
+    selfie += '[nop]' * (largest_selfie_len - sf.len_selfies(selfie))
+
+    # integer encode
+    symbol_list = sf.split_selfies(selfie)
+    integer_encoded = [symbol_to_int[symbol] for symbol in symbol_list]
+
+    # one hot-encode the integer encoded selfie
+    onehot_encoded = list()
+    for index in integer_encoded:
+        letter = [0] * len(alphabet)
+        letter[index] = 1
+        onehot_encoded.append(letter)
+
+    return integer_encoded, np.array(onehot_encoded)
+
+
+def multiple_selfies_to_hot(selfies_list, largest_molecule_len, alphabet):
+    """Convert a list of selfies strings to a one-hot encoding
+    """
+
+    hot_list = []
+    for s in selfies_list:
+        _, onehot_encoded = selfies_to_hot(s, largest_molecule_len, alphabet)
+        hot_list.append(onehot_encoded)
+    return np.array(hot_list)