Switch to unified view

a b/scripts/annotation-generation-2d.py
1
import json
2
import os.path as osp
3
from glob import glob
4
5
import pandas as pd
6
7
# 1. N - Normal
8
# 2. V - PVC (Premature ventricular contraction)
9
# 3. \ - PAB (Paced beat)
10
# 4. R - RBB (Right bundle branch)
11
# 5. L - LBB (Left bundle branch)
12
# 6. A - APB (Atrial premature beat)
13
# 7. ! - AFW (Ventricular flutter wave)
14
# 8. E - VEB (Ventricular escape beat)
15
16
classes = ["N", "V", "\\", "R", "L", "A", "!", "E"]
17
lead = "MLII"
18
extension = "png"  # or `npy` for 1D
19
data_path = osp.abspath("../data/*/*/*/*/*.{}".format(extension))
20
val_size = 0.1  # [0, 1]
21
22
output_path = "/".join(data_path.split("/")[:-5])
23
random_state = 7
24
25
if __name__ == "__main__":
26
    dataset = []
27
    files = glob(data_path)
28
29
    for file in glob(data_path):
30
        *_, name, lead, label, filename = file.split("/")
31
        dataset.append(
32
            {
33
                "name": name,
34
                "lead": lead,
35
                "label": label,
36
                "filename": osp.splitext(filename)[0],
37
                "path": file,
38
            },
39
        )
40
41
    data = pd.DataFrame(dataset)
42
    data = data[data["lead"] == lead]
43
    data = data[data["label"].isin(classes)]
44
    data = data.sample(frac=1, random_state=random_state)
45
46
    val_ids = []
47
    for cl in classes:
48
        val_ids.extend(
49
            data[data["label"] == cl]
50
            .sample(frac=val_size, random_state=random_state)
51
            .index,
52
        )
53
54
    val = data.loc[val_ids, :]
55
    train = data[~data.index.isin(val.index)]
56
57
    train.to_json(osp.join(output_path, "train.json"), orient="records")
58
    val.to_json(osp.join(output_path, "val.json"), orient="records")
59
60
    d = {}
61
    for label in train.label.unique():
62
        d[label] = len(d)
63
64
    with open(osp.join(output_path, "class-mapper.json"), "w") as file:
65
        file.write(json.dumps(d, indent=1))