--- a
+++ b/py_scripts/lmdb_utils.py
@@ -0,0 +1,74 @@
+import os
+import pickle
+import lmdb
+import selfies as sf
+from tqdm import tqdm, trange
+
+
+
+
+
+
+
+def read_lmdb(lmdb_path):
+    env = lmdb.open(
+        lmdb_path,
+        subdir=False,
+        readonly=True,
+        lock=False,
+        readahead=False,
+        meminit=False,
+        max_readers=256,
+    )  
+    txn = env.begin()
+    keys = list(txn.cursor().iternext(values=False))
+    out_list = []
+    for idx in tqdm(keys):
+        datapoint_pickled = txn.get(idx)
+        data = pickle.loads(datapoint_pickled)
+        out_list.append(data)
+        print(len(data["coordinates"]))
+    env.close()
+    return out_list 
+
+
+
+def write_lmdb(out_list, save_path):
+    
+    env = lmdb.open(
+        save_path,
+        subdir=False,
+        lock=False,
+        readahead=False,
+        meminit=False,
+        max_readers=64,
+        map_size=1099511627776
+    )
+
+    with env.begin(write=True) as lmdb_txn:
+        for i in tqdm(range(len(out_list))):
+            lmdb_txn.put(str(i).encode('ascii'), pickle.dumps(out_list[i]))
+
+
+
+if __name__ == "__main__":
+    # Example usage
+    # Read LMDB
+    lmdb_path = "./data/train_no_test_af/train.lmdb"
+    data = read_lmdb(lmdb_path)
+    print(data[0].keys())
+    #print(len(data[1]["coordinates"]))
+
+    dic = {
+        "atoms": "atom types for each atom in the ligand",
+        "coordinates": "3D coordinates for each atom in the ligand generated by RDKit. Max number of conformations is 10",
+        "pocket_atoms": "atom types for each atom in the pocket",
+        "pocket_coordinates": "3D coordinates for each atom in the pocket",
+        "mol": "RDKit molecule object for the ligand",
+        "smi": "SMILES string for the ligand",
+        "pocket": "pdbid of the pocket",
+    }
+
+    
+
+    
\ No newline at end of file