Diff of /data_load.py [000000] .. [857d1b]

Switch to unified view

a b/data_load.py
1
import os
2
import random
3
import numpy as np
4
import pandas as pd
5
6
class DataLoad(object):
7
8
    _all_data = None
9
    _extract_data_size = 0
10
    _class_num = 0
11
12
    def __init__(self, data_path, time_step, class_num):
13
        if not os.path.exists(data_path):
14
            print('%s is not found'%(data_path))
15
            raise FileExistsError
16
        self._time_step = time_step
17
        self._extract_data_size = self._time_step
18
        self._class_num = class_num
19
        self._data_file_list = [os.path.join(data_path, file) for file in os.listdir(data_path)]
20
21
        self._all_data = pd.DataFrame()
22
        for f in self._data_file_list:
23
            # 读取所有csv文件
24
            if 'csv' in f:
25
                data = pd.read_csv(f, index_col=False)
26
                self._all_data = self._all_data.append(data)
27
28
    def get_batch(self, batchsize, start_list=None):
29
        data_size = len(self._all_data.acc_x.values)
30
31
        if start_list is None:
32
            start_pos = [random.randint(1, data_size - self._extract_data_size) for _ in range(data_size)]
33
        else:
34
            if len(start_list) != batchsize:
35
                print('batchisze = ', batchsize)
36
                print('start_list length = ', len(start_list))
37
                raise KeyError('batchsize is no equal to start_list length!')
38
            start_pos = start_list
39
40
        train_x = []
41
        label_y = []
42
        for i in range(batchsize):
43
44
            train_x.append(self._all_data.iloc[start_pos[i]:start_pos[i]+self._extract_data_size, 0:3].values)
45
            label = [[0 for _ in range(self._class_num)] for _ in range(self._extract_data_size)]
46
47
            for s in range(self._extract_data_size):
48
                j = self._all_data.iloc[start_pos[i] + s:start_pos[i] + s + 1, 6].values[0]
49
                label[s][j] = 1
50
            label_y.append(label)
51
52
        return np.array(train_x), np.array(label_y)
53
54
    def get_test_data(self):
55
        """
56
        x shape = [datasize, 3]
57
        y shape = [datasize ,1]
58
        :return:
59
        """
60
        x = np.array(self._all_data.iloc[:, 0:3].values)
61
        y = np.array(self._all_data.iloc[:, 6].values)
62
        return x, y
63
64
65
66
if __name__ == '__main__':
67
    data = DataLoad('./dataset/train/', time_step=150, class_num=11)
68
    x, y = data.get_batch(50)
69
    print(x.shape)
70
    print(y.shape)