ecg-classification / Git / Diff of /tensorflow_impl/fetch

Models:
ReneeD/
ecg-classification
Downloads: 1
Diff of /tensorflow_impl/fetch_data.py [000000] .. [74ff45]
Switch to side-by-side view

--- a
+++ b/tensorflow_impl/fetch_data.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+
+import os, sys, errno
+import csv
+import random
+import subprocess
+import argparse
+
+par = argparse.ArgumentParser(description="Download and process Physionet Datasets")
+
+par.add_argument("-dl", nargs="+",
+                 dest="dataset_list",
+                 default=[],
+                 choices=["nsrdb", "apnea-ecg", "mitdb", "afdb", "svdb"],
+                 help="The list of datasets to download")
+
+args = par.parse_args()
+dataset_list = args.dataset_list
+
+
+def fetch_data():
+    """
+    nsrdb normal sinus rhythm
+    apnea
+    mitdb arrhythmia
+    afdb atrial fibrillation
+    svdb supraventricular arrhythmia 
+    """
+
+    physionet = {
+        "nsrdb": ["16265", "16272", "16273", "16420", "16483", "16539", "16773",
+                  "16786", "16795", "17052", "17453", "18177", "18184", "19088",
+                  "19090", "19093", "19140", "19830"],
+        "apnea-ecg": ["a01", "a01er", "a01r", "a02", "a02er", "a02r", "a03",
+                      "a03er", "a03r", "a04", "a04er", "a04r", "a05", "a06",
+                      "a07", "a08", "a09", "a10", "a11", "a12", "a13", "a14",
+                      "a15", "a16", "a17", "a18", "a19", "a20", "b01", "b01er",
+                      "b01r", "b02", "b03", "b04", "b05", "c01", "c01er", "c01r",
+                      "c02", "c02er", "c02r", "c03", "c03er", "c03r", "c04",
+                      "c05", "c06", "c07", "c08", "c09", "c10", "x01", "x02",
+                      "x03", "x04", "x05", "x06", "x07", "x08", "x09", "x10",
+                      "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18",
+                      "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26",
+                      "x27", "x28", "x29", "x30", "x31", "x32", "x33", "x34", "x35"],
+        "mitdb": ["100", "101", "102", "103", "104", "105", "106", "107", "108",
+                  "109", "111", "112", "113", "114", "115", "116", "117", "118",
+                  "119", "121", "122", "123", "124", "200", "201", "202", "203",
+                  "205", "207", "208", "209", "210", "212", "213", "214", "215",
+                  "217", "219", "220", "221", "222", "223", "228", "230", "231",
+                  "232", "233", "234"],
+        "afdb": ["04015", "04043", "04048", "04126", "04746", "04908", "04936",
+                 "05091", "05121", "05261", "06426", "06453", "06995", "07162",
+                 "07859", "07879", "07910", "08215", "08219", "08378", "08405",
+                 "08434", "08455"],
+        "svdb": ["800", "801", "802", "803", "804", "805", "806", "807", "808",
+                 "809", "810", "811", "812", "820", "821", "822", "823", "824",
+                 "825", "826", "827", "828", "829", "840", "841", "842", "843",
+                 "844", "845", "846", "847", "848", "849", "850", "851", "852",
+                 "853", "854", "855", "856", "857", "858", "859", "860", "861",
+                 "862", "863", "864", "865", "866", "867", "868", "869", "870",
+                 "871", "872", "873", "874", "875", "876", "877", "878", "879",
+                 "880", "881", "882", "883", "884", "885", "886", "887", "888",
+                 "889", "890", "891", "892", "893", "894"]
+    }
+
+    dataset_dir = "datasets/raws"
+    
+    def check_folder_existance():
+        if not os.path.isdir(dataset_dir):
+            print("Directory {} not found".format(dataset_dir))
+            print("Creating now...")
+            os.makedirs(dataset_dir)
+
+        for database in physionet:
+            folder = os.path.join(dataset_dir, database)
+            if not os.path.isdir(folder):
+                print("Directory {} not found".format(folder))
+                print("Creating now...")
+                os.makedirs(folder)
+
+    def rdsamp_installed():
+        try:
+            subprocess.call(["rdsamp", "-h"], stdout=subprocess.DEVNULL,
+                                              stderr=subprocess.DEVNULL)
+            return True
+        except OSError as e:
+            if e.errno == errno.ENOENT:
+                print("rdsamp not installed, link to the installation guide in the README")
+                return False
+
+        print("rdsamp installed check failed")
+        return False
+
+    def remove_unwanted_datasets():
+        if dataset_list:
+            unwanted_ds = physionet.keys() - dataset_list
+            for ds in unwanted_ds:
+                physionet.pop(ds, None)
+
+
+    remove_unwanted_datasets()
+    check_folder_existance()
+    if not rdsamp_installed():
+        sys.exit(1)
+
+    for database, samples in physionet.items():
+        print("Downloading {}".format(database))
+        database_dir = os.path.join(dataset_dir, database)
+        for sample in samples:
+            csv_file_path = os.path.join(database_dir, sample) + ".csv"
+            if os.path.exists(csv_file_path):
+                print("File {} exists. Skipping download...".format(csv_file_path))
+                continue
+
+            sample_path = os.path.join(database, sample)
+            cmd = ("rdsamp -r {} -c -H -f 0" +
+                   " -t 60 -v -pe > {}").format(sample_path, csv_file_path)
+            try:
+                print("Downloading with command {}...".format(cmd))
+                subprocess.check_call(cmd, shell=True)
+            except Exception as e:
+                print("Failed to execute command: {} with exception: {}".format(cmd, e))
+                if os.path.exists(csv_file_path):
+                    os.remove(csv_file_path)
+
+        if os.path.isdir(database_dir) and not os.listdir(database_dir):
+            cmd = "rm -rf {}".format(database_dir)
+            subprocess.check_call(cmd, shell=True)
+    print("Done")
+
+
+def process_data():
+    print("Processing data...")
+    raw_dir = "datasets/raws"
+    processed_dir = "datasets/processed"
+    
+    ecg_dirs = os.listdir(raw_dir)
+    
+    if not os.path.exists(processed_dir):
+        os.makedirs(processed_dir)
+
+    for ecg_name in ecg_dirs:
+        print("Processing {}".format(ecg_name))
+        processed_csv = os.path.join(processed_dir, ecg_name) + ".csv"
+        with open(processed_csv, 'w') as write_processed_file:
+            csvwriter = csv.writer(write_processed_file, delimiter=',')
+            record_dir = os.path.join(raw_dir, ecg_name)
+            for record in os.listdir(record_dir):
+                if record.endswith('.csv'):
+                    record_path = os.path.join(record_dir, record)
+
+                    with open(record_path) as read_raw_file:
+                        reader = csv.reader(read_raw_file)
+                        # skip headers
+                        reader.__next__()
+                        reader.__next__()
+                        for row in reader:
+                            csvwriter.writerow([row[1]])
+    print("Done")
+
+
+fetch_data()
+process_data()