[f8af2c]: / functions.py

Download this file

100 lines (86 with data), 2.7 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
import numpy as np
import random
def quan_detector(dataset,label):
"""
the percent of zeros(n) promoter
"""
n = len(dataset[1])
N = len(dataset)
p = [0]*n
count = 0.
pos_count = 0.
neg_count = 0.
for i in range(N):
d = dataset[i]
if all(d == p):
count += 1.
if label[i][0] == 1:
pos_count += 1.
else:
neg_count += 1.
return count/N, pos_count,neg_count
#quan_detector([[0,0,0],[0,1,0],[0,2,0],[1,0,1]])
def most_repeared_promoter(dataset,label):
N = len(dataset)
n = len(dataset[1])
zeros = ','.join(map(str,['0']*n))
# print len(zeros)-n
dict_count = {}
for i in range(N):
str_prom = ','.join(map(str,dataset[i]))
if str_prom not in dict_count.keys():
dict_count[str_prom] = [1,0,0]
if label[i][0] == 1:
dict_count[str_prom][1] = 1
else:
dict_count[str_prom][2] = 1
else:
dict_count[str_prom][0] += 1
if label[i][0] == 1:
dict_count[str_prom][1] += 1
else:
dict_count[str_prom][2] += 1
if zeros in dict_count.keys():
dict_count.pop(zeros) # remove without
count = np.array(dict_count.values())[:,0]
max_count = max(count)
for k,v in dict_count.items():
if v[0] == max_count:
idx_temp = k
idx = idx_temp.split(',')
return idx, max_count, dict_count[idx_temp]
###############################################################
######### Dataset generation ##############
###############################################################
def indx(lab):
# lab = np.argmax(lab,axis=1)
p = [] # positive samples index-- ALS
n = [] # negative samples index-- Non-ALS
for i in range(len(lab)):
if lab[i] == 0:
p.append(i)
else:
n.append(i)
return p, n
def dataset(X, Y, test_ratio):
lab = np.argmax(Y, axis=1)
pos_s, neg_s = indx(lab)
N = len(lab)
idx = range(N)
N_te = int(N * test_ratio) / 5 * 5 # number of test samples
N_tr = N - N_te # number of training samples
pos_s_te = int(N_te * 0.5)
neg_s_te = int(N_te * 0.5)
random.shuffle(pos_s)
random.shuffle(neg_s)
pos_idx_te = pos_s[:pos_s_te]
neg_idx_te = neg_s[:neg_s_te]
te_idx = pos_idx_te + neg_idx_te
tr_idx = list(set(idx) - set(te_idx))
random.shuffle(te_idx)
random.shuffle(tr_idx)
tr_X = X[tr_idx]
tr_Y = Y[tr_idx]
te_X = X[te_idx]
te_Y = Y[te_idx]
return tr_X, tr_Y, te_X, te_Y