[4807fa]: / dl / utils / outlier.py

Download this file

62 lines (55 with data), 2.0 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""Functions for remove outliers"""
import numpy as np
def remove_boxplot_outlier(array, whis=1.5, only_positive=True):
"""remove outliers drawn from matplotlib.pyplot.boxplot
"""
if only_positive:
q1 = np.percentile(array[array>0], 25)
q3 = np.percentile(array[array>0], 75)
else:
q1 = np.percentile(array, 25)
q3 = np.percentile(array, 75)
iqr = q3 - q1
a_min = q1 - whis*iqr
a_max = q3 + whis*iqr
return np.clip(array, a_min, a_max)
def log2_transformation(mat, forced=False, threshold=50):
"""log2 transform
Args:
mat: np.array
forced: if forced is True, then do log2 transformation immediately;
otherwise use threshold to decide if log2 transformation is necessary
threshold: float, default 50;
if range(mat) / interquartile range > threshold, then do transform
"""
mat = np.array(mat) # in case arg mat is a list
if forced:
return np.log2(mat - mat.min() + 1)
q1 = np.percentile(mat, 25)
q3 = np.percentile(mat, 75)
iqr = q3 - q1
r = mat.max() - mat.min()
if (iqr==0 and r>0) or r/iqr > threshold:
mat = np.log2(mat - mat.min() + 1)
return mat
def normalization(mat, transform=True, forced=False, threshold=50, rm_outlier=True, whis=1.5,
only_positive=True, max_val=1, diagonal=1, symmetric=True):
"""Normalize interaction/similarity matrix
Args:
transform: if True, call log2_transform(mat, forced, threshold)
rm_outlier: if True, call remove_boxplot_outlier(mat, whis, only_positive)
max_val: if max_val=1, execute mat=mat/mat.max()
diagonal: if diagonal=1, make diagonal element to be 1
symmetric: if True, execute mat = (mat+mat.T)/2
"""
if transform:
mat = log2_transformation(mat, forced, threshold)
if rm_outlier:
mat = remove_boxplot_outlier(mat, whis, only_positive)
if max_val == 1:
mat = mat / mat.max()
if diagonal == 1:
mat[range(len(mat)), range(len(mat))] = 1
if symmetric:
mat = (mat + mat.T) / 2
return mat