|
a |
|
b/examples/mitdb/extract.py |
|
|
1 |
from __future__ import print_function |
|
|
2 |
|
|
|
3 |
import cPickle as pickle |
|
|
4 |
import glob |
|
|
5 |
import numpy as np |
|
|
6 |
import os |
|
|
7 |
import subprocess |
|
|
8 |
|
|
|
9 |
WFDB = "/deep/group/med/tools/wfdb-10.5.24/build/bin/" |
|
|
10 |
DATA = "/deep/group/med/mitdb" |
|
|
11 |
|
|
|
12 |
def extract_wave(idx): |
|
|
13 |
""" |
|
|
14 |
Reads .dat file and returns in numpy array. Assumes 2 channels. The |
|
|
15 |
returned array is n x 3 where n is the number of samples. The first column |
|
|
16 |
is the sample number and the second two are the first and second channel |
|
|
17 |
respectively. |
|
|
18 |
""" |
|
|
19 |
rdsamp = os.path.join(WFDB, 'rdsamp') |
|
|
20 |
output = subprocess.check_output([rdsamp, '-r', idx], cwd=DATA) |
|
|
21 |
data = np.fromstring(output, dtype=np.int32, sep=' ') |
|
|
22 |
return data.reshape((-1, 3)) |
|
|
23 |
|
|
|
24 |
def extract_annotation(idx): |
|
|
25 |
""" |
|
|
26 |
The annotation file column names are: |
|
|
27 |
Time, Sample #, Type, Sub, Chan, Num, Aux |
|
|
28 |
The Aux is optional, it could be left empty. Type is the beat type and Aux |
|
|
29 |
is the transition label. |
|
|
30 |
""" |
|
|
31 |
rdann = os.path.join(WFDB, 'rdann') |
|
|
32 |
output = subprocess.check_output([rdann, '-r', idx, '-a', 'atr'], cwd=DATA) |
|
|
33 |
labels = (line.split() for line in output.strip().split("\n")) |
|
|
34 |
labels = [(l[0], int(l[1]), l[2], l[6] if len(l) == 7 else None) |
|
|
35 |
for l in labels] |
|
|
36 |
return labels |
|
|
37 |
|
|
|
38 |
def extract(idx): |
|
|
39 |
""" |
|
|
40 |
Extracts data and annotations from .dat and .atr files. |
|
|
41 |
Returns a numpy array for the data and a list of tuples for the labels. |
|
|
42 |
""" |
|
|
43 |
data = extract_wave(idx) |
|
|
44 |
labels = extract_annotation(idx) |
|
|
45 |
return data, labels |
|
|
46 |
|
|
|
47 |
def save(example, idx): |
|
|
48 |
""" |
|
|
49 |
Saves data with numpy.save (load with numpy.load) and pickles labels. The |
|
|
50 |
files are saved in the same place as the raw data. |
|
|
51 |
""" |
|
|
52 |
np.save(os.path.join(DATA, idx), example[0]) |
|
|
53 |
with open(os.path.join(DATA, "{}.pkl".format(idx)), 'w') as fid: |
|
|
54 |
pickle.dump(example[1], fid) |
|
|
55 |
|
|
|
56 |
if __name__ == "__main__": |
|
|
57 |
files = glob.glob(os.path.join(DATA, "*.dat")) |
|
|
58 |
idxs = [os.path.basename(f).split(".")[0] for f in files] |
|
|
59 |
for idx in idxs: |
|
|
60 |
example = extract(idx) |
|
|
61 |
save(example, idx) |
|
|
62 |
print("Example {}".format(idx)) |