|
a |
|
b/datasets.py |
|
|
1 |
import h5py |
|
|
2 |
import math |
|
|
3 |
import pandas as pd |
|
|
4 |
from tensorflow.keras.utils import Sequence |
|
|
5 |
import numpy as np |
|
|
6 |
|
|
|
7 |
|
|
|
8 |
class ECGSequence(Sequence): |
|
|
9 |
@classmethod |
|
|
10 |
def get_train_and_val(cls, path_to_hdf5, hdf5_dset, path_to_csv, batch_size=8, val_split=0.02): |
|
|
11 |
n_samples = len(pd.read_csv(path_to_csv)) |
|
|
12 |
n_train = math.ceil(n_samples*(1-val_split)) |
|
|
13 |
train_seq = cls(path_to_hdf5, hdf5_dset, path_to_csv, batch_size, end_idx=n_train) |
|
|
14 |
valid_seq = cls(path_to_hdf5, hdf5_dset, path_to_csv, batch_size, start_idx=n_train) |
|
|
15 |
return train_seq, valid_seq |
|
|
16 |
|
|
|
17 |
def __init__(self, path_to_hdf5, hdf5_dset, path_to_csv=None, batch_size=8, |
|
|
18 |
start_idx=0, end_idx=None): |
|
|
19 |
if path_to_csv is None: |
|
|
20 |
self.y = None |
|
|
21 |
else: |
|
|
22 |
self.y = pd.read_csv(path_to_csv).values |
|
|
23 |
# Get tracings |
|
|
24 |
self.f = h5py.File(path_to_hdf5, "r") |
|
|
25 |
self.x = self.f[hdf5_dset] |
|
|
26 |
self.batch_size = batch_size |
|
|
27 |
if end_idx is None: |
|
|
28 |
end_idx = len(self.x) |
|
|
29 |
self.start_idx = start_idx |
|
|
30 |
self.end_idx = end_idx |
|
|
31 |
|
|
|
32 |
@property |
|
|
33 |
def n_classes(self): |
|
|
34 |
return self.y.shape[1] |
|
|
35 |
|
|
|
36 |
def __getitem__(self, idx): |
|
|
37 |
start = self.start_idx + idx * self.batch_size |
|
|
38 |
end = min(start + self.batch_size, self.end_idx) |
|
|
39 |
if self.y is None: |
|
|
40 |
return np.array(self.x[start:end, :, :]) |
|
|
41 |
else: |
|
|
42 |
return np.array(self.x[start:end, :, :]), np.array(self.y[start:end]) |
|
|
43 |
|
|
|
44 |
def __len__(self): |
|
|
45 |
return math.ceil((self.end_idx - self.start_idx) / self.batch_size) |
|
|
46 |
|
|
|
47 |
def __del__(self): |
|
|
48 |
self.f.close() |