Diff of /functions.py [000000] .. [f8af2c]

Switch to unified view

a b/functions.py
1
import numpy as np
2
import random
3
4
def quan_detector(dataset,label):
5
    """
6
    the percent of zeros(n) promoter
7
    """
8
    n = len(dataset[1])
9
    N = len(dataset)
10
    p = [0]*n
11
    count = 0.
12
    pos_count = 0.
13
    neg_count = 0.
14
    for i in range(N):
15
        d = dataset[i]
16
        if all(d == p):
17
            count += 1.
18
            if label[i][0] == 1:
19
                pos_count += 1.
20
            else:
21
                neg_count += 1.
22
    return count/N, pos_count,neg_count
23
#quan_detector([[0,0,0],[0,1,0],[0,2,0],[1,0,1]])
24
25
def most_repeared_promoter(dataset,label):
26
    N = len(dataset)
27
    n = len(dataset[1])
28
    zeros = ','.join(map(str,['0']*n))
29
    # print len(zeros)-n
30
    dict_count = {}
31
    for i in range(N):
32
        str_prom = ','.join(map(str,dataset[i]))
33
        if str_prom not in dict_count.keys():
34
            dict_count[str_prom] = [1,0,0]
35
            if label[i][0] == 1:
36
                dict_count[str_prom][1] = 1
37
            else:
38
                dict_count[str_prom][2] = 1
39
        else:
40
            dict_count[str_prom][0] += 1
41
            if label[i][0] == 1:
42
                dict_count[str_prom][1] += 1
43
            else:
44
                dict_count[str_prom][2] += 1
45
    if zeros in dict_count.keys():
46
        dict_count.pop(zeros) # remove without
47
    count = np.array(dict_count.values())[:,0]
48
    max_count = max(count)
49
    for k,v in dict_count.items():
50
        if v[0] == max_count:
51
            idx_temp = k
52
    idx = idx_temp.split(',')
53
    return idx, max_count, dict_count[idx_temp]
54
55
###############################################################
56
#########          Dataset generation            ##############
57
###############################################################
58
def indx(lab):
59
    #     lab = np.argmax(lab,axis=1)
60
    p = []  # positive samples index-- ALS
61
    n = []  # negative samples index-- Non-ALS
62
    for i in range(len(lab)):
63
        if lab[i] == 0:
64
            p.append(i)
65
        else:
66
            n.append(i)
67
    return p, n
68
69
70
def dataset(X, Y, test_ratio):
71
    lab = np.argmax(Y, axis=1)
72
    pos_s, neg_s = indx(lab)
73
74
    N = len(lab)
75
    idx = range(N)
76
77
    N_te = int(N * test_ratio) / 5 * 5  # number of test samples
78
    N_tr = N - N_te  # number of training samples
79
80
    pos_s_te = int(N_te * 0.5)
81
    neg_s_te = int(N_te * 0.5)
82
83
    random.shuffle(pos_s)
84
    random.shuffle(neg_s)
85
86
    pos_idx_te = pos_s[:pos_s_te]
87
    neg_idx_te = neg_s[:neg_s_te]
88
89
    te_idx = pos_idx_te + neg_idx_te
90
    tr_idx = list(set(idx) - set(te_idx))
91
92
    random.shuffle(te_idx)
93
    random.shuffle(tr_idx)
94
95
    tr_X = X[tr_idx]
96
    tr_Y = Y[tr_idx]
97
98
    te_X = X[te_idx]
99
    te_Y = Y[te_idx]
100
    return tr_X, tr_Y, te_X, te_Y