Diff of /datasets.py [000000] .. [c1a411]

Switch to side-by-side view

--- a
+++ b/datasets.py
@@ -0,0 +1,48 @@
+import h5py
+import math
+import pandas as pd
+from tensorflow.keras.utils import Sequence
+import numpy as np
+
+
+class ECGSequence(Sequence):
+    @classmethod
+    def get_train_and_val(cls, path_to_hdf5, hdf5_dset, path_to_csv, batch_size=8, val_split=0.02):
+        n_samples = len(pd.read_csv(path_to_csv))
+        n_train = math.ceil(n_samples*(1-val_split))
+        train_seq = cls(path_to_hdf5, hdf5_dset, path_to_csv, batch_size, end_idx=n_train)
+        valid_seq = cls(path_to_hdf5, hdf5_dset, path_to_csv, batch_size, start_idx=n_train)
+        return train_seq, valid_seq
+
+    def __init__(self, path_to_hdf5, hdf5_dset, path_to_csv=None, batch_size=8,
+                 start_idx=0, end_idx=None):
+        if path_to_csv is None:
+            self.y = None
+        else:
+            self.y = pd.read_csv(path_to_csv).values
+        # Get tracings
+        self.f = h5py.File(path_to_hdf5, "r")
+        self.x = self.f[hdf5_dset]
+        self.batch_size = batch_size
+        if end_idx is None:
+            end_idx = len(self.x)
+        self.start_idx = start_idx
+        self.end_idx = end_idx
+
+    @property
+    def n_classes(self):
+        return self.y.shape[1]
+
+    def __getitem__(self, idx):
+        start = self.start_idx + idx * self.batch_size
+        end = min(start + self.batch_size, self.end_idx)
+        if self.y is None:
+            return np.array(self.x[start:end, :, :])
+        else:
+            return np.array(self.x[start:end, :, :]), np.array(self.y[start:end])
+
+    def __len__(self):
+        return math.ceil((self.end_idx - self.start_idx) / self.batch_size)
+
+    def __del__(self):
+        self.f.close()