Diff of /datasets.py [000000] .. [c1a411]

Switch to unified view

a b/datasets.py
1
import h5py
2
import math
3
import pandas as pd
4
from tensorflow.keras.utils import Sequence
5
import numpy as np
6
7
8
class ECGSequence(Sequence):
9
    @classmethod
10
    def get_train_and_val(cls, path_to_hdf5, hdf5_dset, path_to_csv, batch_size=8, val_split=0.02):
11
        n_samples = len(pd.read_csv(path_to_csv))
12
        n_train = math.ceil(n_samples*(1-val_split))
13
        train_seq = cls(path_to_hdf5, hdf5_dset, path_to_csv, batch_size, end_idx=n_train)
14
        valid_seq = cls(path_to_hdf5, hdf5_dset, path_to_csv, batch_size, start_idx=n_train)
15
        return train_seq, valid_seq
16
17
    def __init__(self, path_to_hdf5, hdf5_dset, path_to_csv=None, batch_size=8,
18
                 start_idx=0, end_idx=None):
19
        if path_to_csv is None:
20
            self.y = None
21
        else:
22
            self.y = pd.read_csv(path_to_csv).values
23
        # Get tracings
24
        self.f = h5py.File(path_to_hdf5, "r")
25
        self.x = self.f[hdf5_dset]
26
        self.batch_size = batch_size
27
        if end_idx is None:
28
            end_idx = len(self.x)
29
        self.start_idx = start_idx
30
        self.end_idx = end_idx
31
32
    @property
33
    def n_classes(self):
34
        return self.y.shape[1]
35
36
    def __getitem__(self, idx):
37
        start = self.start_idx + idx * self.batch_size
38
        end = min(start + self.batch_size, self.end_idx)
39
        if self.y is None:
40
            return np.array(self.x[start:end, :, :])
41
        else:
42
            return np.array(self.x[start:end, :, :]), np.array(self.y[start:end])
43
44
    def __len__(self):
45
        return math.ceil((self.end_idx - self.start_idx) / self.batch_size)
46
47
    def __del__(self):
48
        self.f.close()