[bbafc7]: / ecg / examples / cinc17 / build_datasets.py

Download this file

56 lines (44 with data), 1.4 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import json
import numpy as np
import os
import random
import scipy.io as sio
import tqdm
STEP = 256
def load_ecg_mat(ecg_file):
return sio.loadmat(ecg_file)['val'].squeeze()
def load_all(data_path):
label_file = os.path.join(data_path, "../REFERENCE-v3.csv")
with open(label_file, 'r') as fid:
records = [l.strip().split(",") for l in fid]
dataset = []
for record, label in tqdm.tqdm(records):
ecg_file = os.path.join(data_path, record + ".mat")
ecg_file = os.path.abspath(ecg_file)
ecg = load_ecg_mat(ecg_file)
num_labels = ecg.shape[0] // STEP
dataset.append((ecg_file, [label]*num_labels))
return dataset
def split(dataset, dev_frac):
dev_cut = int(dev_frac * len(dataset))
random.shuffle(dataset)
dev = dataset[:dev_cut]
train = dataset[dev_cut:]
return train, dev
def make_json(save_path, dataset):
with open(save_path, 'w') as fid:
for d in dataset:
datum = {'ecg' : d[0],
'labels' : d[1]}
json.dump(datum, fid)
fid.write('\n')
from pathlib import Path
if __name__ == "__main__":
random.seed(2018)
dev_frac = 0.1
home = str(Path.home())
data_path = home + "/ecg/examples/cinc17/data/training2017/"
dataset = load_all(data_path)
train, dev = split(dataset, dev_frac)
make_json("train.json", train)
make_json("dev.json", dev)