a b/dataset/read_raw_dataset.py
1
import numpy as np
2
from os import listdir
3
from os.path import isfile, join
4
5
def read_raw_dataset():
6
    
7
    """
8
    Returns a dictionary `raw_dataset` which contains following keys pairs.
9
    A, B, C, D, and F with values as 2D numpy array of shape (m, n).
10
    
11
    m = no. of training examples in each set
12
    n - no. of data points in time series
13
    """
14
    
15
    # Directory names are saved as F, O, N, etc.
16
    # And set names are processed as A, B, C, etc.
17
    mapping_set_to_dir = {
18
        'A': (0,'Z'),
19
        'B': (1,'O'),
20
        'C': (2,'N'),
21
        'D': (3,'F'),
22
        'E': (4,'S')
23
    }
24
25
    file_lists = []
26
    
27
    # get the list of files for each set
28
    # 1 file corresponds to 1 training example
29
    for s,d in mapping_set_to_dir.items():
30
        file_lists.insert(d[0], [f for f in listdir(d[1]) if isfile(join(d[1], f))])
31
    
32
    raw_dataset = { }
33
34
    # loop over all sets
35
    for s,d in mapping_set_to_dir.items():
36
37
        # loop over every file (training example) in each set
38
        for f in file_lists[d[0]]:
39
            
40
            # read the time series data
41
            curr_example = np.loadtxt(join(d[1], f))
42
43
            # create a key in the raw_database dict in case it doesn't exist already
44
            # otherwise just append the new example in the 2D array
45
            if (s in raw_dataset):
46
                raw_dataset[s] = np.append(raw_dataset[s], [curr_example], axis=0)
47
            else:
48
                raw_dataset[s] = np.array([curr_example])
49
    
50
    return raw_dataset