[41c1e8]: / exseek / scripts / utils.py

Download this file

95 lines (87 with data), 3.3 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from collections import namedtuple
import numpy as np
import inspect
GFFRecord = namedtuple('GFFRecord', ['seqname', 'source', 'feature', 'start', 'end', 'score', 'strand', 'frame', 'attr'])
def read_gff(fin, zero_based=False):
'''Read a GFF file
Args:
fin: file-like object
zero_based: subtract start position by 1 if True
Yields:
feature: GFFRecord object
'''
for lineno, line in enumerate(fin):
if line.startswith('#'):
continue
c = line.strip().split('\t')
if len(c) < 9:
raise ValueError('less than 9 columns found in GFF file at line{}'.format(lineno + 1))
c[3] = int(c[3])
c[4] = int(c[4])
if zero_based:
c[3] -= 1
attrs = {}
for a in c[8].split(';'):
i = a.find('=')
key = a[:i]
val = a[(i + 1):]
attrs[key] = val
c[8] = attrs
yield GFFRecord._make(c)
def search_dict(data, keys):
'''Intersect given keys with a dict and return the subset
Parameters:
data: dict
keys: list of keys to search
Returns:
dict
'''
return {key:data[key] for key in keys if key in data}
def get_feature_importances(estimator):
from sklearn.feature_selection import RFE
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
'''Get feature importance attribute of an estimator
'''
if hasattr(estimator, 'coef_'):
return np.ravel(np.abs(estimator.coef_))
elif hasattr(estimator, 'feature_importances_'):
return np.ravel(estimator.feature_importances_)
elif isinstance(estimator, RFE):
ranking = estimator.ranking_.astype('float') - 1
return np.ravel(1.0 - ranking/ranking.max())
elif isinstance(estimator, MLPClassifier):
return np.zeros(estimator.coefs_[0].shape[0])
elif isinstance(estimator, SVC):
# SVC does not have feature importance
return np.zeros(estimator.support_vectors_.shape[1])
else:
raise ValueError('the estimator should have either coef_ or feature_importances_ attribute')
def function_has_arg(func, arg):
return arg in inspect.getargspec(func).args
def python_args_to_r_args(args):
def to_r_arg(s):
if isinstance(arg, str):
return "'" + arg + "'"
elif isinstance(arg, bool):
return 'TRUE' if arg else 'FALSE'
elif isinstance(arg, int):
return str(arg)
elif isinstance(arg, float):
return str(arg)
else:
raise TypeError('{}: unsupported type: {}'.format(arg, type(arg)))
r_args = {}
for name, arg in args.items():
is_atom_type = False
for atom_type in (str, int, bool, float):
if isinstance(arg, atom_type):
r_args[name] = to_r_arg(arg)
is_atom_type = True
break
if not is_atom_type:
if isinstance(arg, tuple) or isinstance(arg, list):
r_args[name] = 'c(' + ','.join(map(to_r_arg, arg)) + ')'
else:
raise TypeError('{}: unsupported type: {}'.format(arg, type(arg)))
return ', '.join(('%s=%s'%(k, v) for k, v in r_args.items()))