|
a |
|
b/tests/test_fingerprints.py |
|
|
1 |
import os |
|
|
2 |
import sys |
|
|
3 |
from itertools import combinations |
|
|
4 |
|
|
|
5 |
import numpy as np |
|
|
6 |
from scipy.sparse import vstack as sparse_vstack |
|
|
7 |
from numpy.testing import (assert_array_equal, |
|
|
8 |
assert_array_almost_equal, |
|
|
9 |
assert_almost_equal) |
|
|
10 |
|
|
|
11 |
import pytest |
|
|
12 |
|
|
|
13 |
import oddt |
|
|
14 |
from oddt.fingerprints import (InteractionFingerprint, |
|
|
15 |
SimpleInteractionFingerprint, |
|
|
16 |
ECFP, |
|
|
17 |
_ECFP_atom_repr, |
|
|
18 |
SPLIF, |
|
|
19 |
similarity_SPLIF, |
|
|
20 |
PLEC, |
|
|
21 |
fold, |
|
|
22 |
MIN_HASH_VALUE, |
|
|
23 |
MAX_HASH_VALUE, |
|
|
24 |
sparse_to_dense, |
|
|
25 |
sparse_to_csr_matrix, |
|
|
26 |
csr_matrix_to_sparse, |
|
|
27 |
dense_to_sparse, |
|
|
28 |
get_molecular_shingles, |
|
|
29 |
hash_fnv1a_python, |
|
|
30 |
dice, |
|
|
31 |
tanimoto) |
|
|
32 |
from .utils import shuffle_mol |
|
|
33 |
|
|
|
34 |
|
|
|
35 |
test_data_dir = os.path.dirname(os.path.abspath(__file__)) |
|
|
36 |
|
|
|
37 |
protein = next(oddt.toolkit.readfile('pdb', os.path.join( |
|
|
38 |
test_data_dir, 'data/pdbbind/10gs/10gs_pocket.pdb'))) |
|
|
39 |
protein.protein = True |
|
|
40 |
protein.addh(only_polar=True) |
|
|
41 |
|
|
|
42 |
ligand = next(oddt.toolkit.readfile('sdf', os.path.join( |
|
|
43 |
test_data_dir, 'data/pdbbind/10gs/10gs_ligand.sdf'))) |
|
|
44 |
ligand.addh(only_polar=True) |
|
|
45 |
|
|
|
46 |
|
|
|
47 |
def test_folding(): |
|
|
48 |
"""FP Folding""" |
|
|
49 |
# Upper bound |
|
|
50 |
assert_array_equal(fold([MAX_HASH_VALUE], 1024), [1023]) |
|
|
51 |
assert_array_equal(fold([MAX_HASH_VALUE], 1234567890), [1234567889]) |
|
|
52 |
assert_array_equal(fold([MAX_HASH_VALUE], MAX_HASH_VALUE / 2), |
|
|
53 |
[MAX_HASH_VALUE / 2 - 1]) |
|
|
54 |
assert_array_equal(fold([MAX_HASH_VALUE], MAX_HASH_VALUE - 1), |
|
|
55 |
[MAX_HASH_VALUE - 2]) |
|
|
56 |
# Lower bound |
|
|
57 |
assert_array_equal(fold([MIN_HASH_VALUE], 1024), [0]) |
|
|
58 |
assert_array_equal(fold([MIN_HASH_VALUE], 1234567890), [0]) |
|
|
59 |
assert_array_equal(fold([MIN_HASH_VALUE], MAX_HASH_VALUE / 2), [0]) |
|
|
60 |
assert_array_equal(fold([MIN_HASH_VALUE], MAX_HASH_VALUE - 1), [0]) |
|
|
61 |
|
|
|
62 |
# Range check |
|
|
63 |
fp = np.arange(1, MAX_HASH_VALUE, 1e6, dtype=int) |
|
|
64 |
assert_array_equal(fold(fp, MAX_HASH_VALUE), fp - 1) |
|
|
65 |
|
|
|
66 |
@pytest.mark.skipif(sys.version_info > (3, 7), reason="Only testable with old Python Hash implementation") |
|
|
67 |
def test_hashing_function(): |
|
|
68 |
"""Verify the implementation of Python 2.4-3.7 hash function in Python""" |
|
|
69 |
sample_list = list(range(-10, 10)) |
|
|
70 |
# add nested structure |
|
|
71 |
sample_list.append(tuple(sample_list)) |
|
|
72 |
sample_list.append(tuple(sample_list)) |
|
|
73 |
for sample_tuple in combinations(sample_list, r=5): |
|
|
74 |
python_hash = hash(sample_tuple) |
|
|
75 |
custom_hash = hash_fnv1a_python(sample_tuple) |
|
|
76 |
assert python_hash == custom_hash |
|
|
77 |
|
|
|
78 |
|
|
|
79 |
def test_sparse_densify(): |
|
|
80 |
"""FP densify""" |
|
|
81 |
sparse_fp = [0, 33, 49, 53, 107, 156, 161, 203, 215, 230, 251, 269, 299, |
|
|
82 |
323, 331, 376, 389, 410, 427, 430, 450, 484, 538, 592, 593, |
|
|
83 |
636, 646, 658, 698, 699, 702, 741, 753, 807, 850, 861, 882, |
|
|
84 |
915, 915, 915, 969, 969, 1023] |
|
|
85 |
|
|
|
86 |
# count vectors |
|
|
87 |
dense = sparse_to_dense(sparse_fp, size=1024, count_bits=True) |
|
|
88 |
csr = sparse_to_csr_matrix(sparse_fp, size=1024, count_bits=True) |
|
|
89 |
assert_array_equal(dense.reshape(1, -1), csr.toarray()) |
|
|
90 |
resparsed = dense_to_sparse(dense) |
|
|
91 |
resparsed_csr = csr_matrix_to_sparse(csr) |
|
|
92 |
assert_array_equal(sparse_fp, resparsed) |
|
|
93 |
assert_array_equal(sparse_fp, resparsed_csr) |
|
|
94 |
|
|
|
95 |
# bool vectors |
|
|
96 |
dense = sparse_to_dense(sparse_fp, size=1024, count_bits=False) |
|
|
97 |
csr = sparse_to_csr_matrix(sparse_fp, size=1024, count_bits=False) |
|
|
98 |
assert_array_equal(dense.reshape(1, -1), csr.toarray()) |
|
|
99 |
resparsed = dense_to_sparse(dense) |
|
|
100 |
resparsed_csr = csr_matrix_to_sparse(csr) |
|
|
101 |
assert_array_equal(np.unique(sparse_fp), resparsed) |
|
|
102 |
assert_array_equal(np.unique(sparse_fp), resparsed_csr) |
|
|
103 |
|
|
|
104 |
# test stacking |
|
|
105 |
np.random.seed(0) |
|
|
106 |
sparse_fps = np.random.randint(0, 1024, size=(20, 100)) |
|
|
107 |
dense = np.vstack([sparse_to_dense(fp, size=1024) for fp in sparse_fps]) |
|
|
108 |
csr = sparse_vstack(sparse_to_csr_matrix(fp, size=1024) for fp in sparse_fps) |
|
|
109 |
assert_array_equal(dense, csr.toarray()) |
|
|
110 |
|
|
|
111 |
# test exceptions |
|
|
112 |
with pytest.raises(ValueError): |
|
|
113 |
csr_matrix_to_sparse(np.array([1, 2, 3])) |
|
|
114 |
|
|
|
115 |
|
|
|
116 |
def test_InteractionFingerprint(): |
|
|
117 |
"""Interaction Fingerprint test""" |
|
|
118 |
if oddt.toolkit.backend == 'ob': |
|
|
119 |
IFP = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, |
|
|
120 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
121 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, |
|
|
122 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
123 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
124 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
125 |
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
126 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, |
|
|
127 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
128 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, |
|
|
129 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
130 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
131 |
1, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
132 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
133 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
134 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
135 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
136 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
137 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
138 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
139 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
140 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
141 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, |
|
|
142 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] |
|
|
143 |
else: |
|
|
144 |
IFP = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, |
|
|
145 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
146 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, |
|
|
147 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
148 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
149 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
150 |
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
151 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, |
|
|
152 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
153 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, |
|
|
154 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
155 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
156 |
1, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
157 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
158 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
159 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
160 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
161 |
0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
162 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
163 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
164 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
165 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
166 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
167 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] |
|
|
168 |
assert_array_equal(IFP, InteractionFingerprint(ligand, protein)) |
|
|
169 |
|
|
|
170 |
|
|
|
171 |
def test_SimpleInteractionFingerprint(): |
|
|
172 |
"""Simple Interaction Fingerprint test """ |
|
|
173 |
SIFP = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, |
|
|
174 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, |
|
|
175 |
0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
176 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
177 |
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
178 |
0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, |
|
|
179 |
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, |
|
|
180 |
0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0] |
|
|
181 |
assert_array_equal(SIFP, SimpleInteractionFingerprint(ligand, protein)) |
|
|
182 |
|
|
|
183 |
|
|
|
184 |
def test_IFP_SIFP_Folding_cum_sum(): |
|
|
185 |
"""Checks, whether InteractionFingerprint and SimpleInteractionFingerprint outcomes matches""" |
|
|
186 |
IFP = np.sum(InteractionFingerprint(ligand, protein), axis=0) |
|
|
187 |
SIFP = np.sum(SimpleInteractionFingerprint(ligand, protein), axis=0) |
|
|
188 |
assert_array_equal(IFP, SIFP) |
|
|
189 |
|
|
|
190 |
|
|
|
191 |
def test_similarity(): |
|
|
192 |
"""FP similarity""" |
|
|
193 |
mols = list(oddt.toolkit.readfile('sdf', os.path.join( |
|
|
194 |
test_data_dir, 'data/dude/xiap/actives_docked.sdf'))) |
|
|
195 |
mols = list(filter(lambda x: x.title == '312335', mols)) |
|
|
196 |
list(map(lambda x: x.addh(only_polar=True), mols)) |
|
|
197 |
receptor = next(oddt.toolkit.readfile('pdb', os.path.join( |
|
|
198 |
test_data_dir, 'data/dude/xiap/receptor_rdkit.pdb'))) |
|
|
199 |
receptor.protein = True |
|
|
200 |
receptor.addh(only_polar=True) |
|
|
201 |
ref = SimpleInteractionFingerprint(mols[0], receptor) |
|
|
202 |
|
|
|
203 |
outcome = [dice(ref, SimpleInteractionFingerprint( |
|
|
204 |
mol, receptor)) for mol in mols[1:]] |
|
|
205 |
target_outcome = np.array([0.742857, 0.645161, 0.727273, 0.571429, |
|
|
206 |
0.727273, 0.588235, 0.75, 0.551724, |
|
|
207 |
0.551724, 0.6875, 0.514286, 0.6875, |
|
|
208 |
0.592593, 0.647059, 0.736842, 0.62069, |
|
|
209 |
0.545455, 0.533333, 0.606061]) |
|
|
210 |
assert_array_almost_equal(outcome, target_outcome) |
|
|
211 |
|
|
|
212 |
outcome = [tanimoto(ref, SimpleInteractionFingerprint( |
|
|
213 |
mol, receptor)) for mol in mols[1:]] |
|
|
214 |
target_outcome = np.array([0.636364, 0.5, 0.666667, 0.384615, 0.666667, |
|
|
215 |
0.545455, 0.666667, 0.5, 0.363636, 0.666667, |
|
|
216 |
0.555556, 0.555556, 0.625, 0.6, 0.727273, |
|
|
217 |
0.555556, 0.5, 0.4, 0.363636]) |
|
|
218 |
assert_array_almost_equal(outcome, target_outcome) |
|
|
219 |
|
|
|
220 |
|
|
|
221 |
def test_sparse_similarity(): |
|
|
222 |
"""Sparse similarity""" |
|
|
223 |
mol1 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)C)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") |
|
|
224 |
mol2 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)O)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") |
|
|
225 |
|
|
|
226 |
mol1_fp_dense = ECFP(mol1, depth=8, size=4096, sparse=False) |
|
|
227 |
mol2_fp_dense = ECFP(mol2, depth=8, size=4096, sparse=False) |
|
|
228 |
|
|
|
229 |
mol1_fp_sparse = ECFP(mol1, depth=8, size=4096, sparse=True) |
|
|
230 |
mol2_fp_sparse = ECFP(mol2, depth=8, size=4096, sparse=True) |
|
|
231 |
|
|
|
232 |
assert_almost_equal(dice(mol1_fp_sparse, mol2_fp_sparse, sparse=True), |
|
|
233 |
dice(mol1_fp_dense, mol2_fp_dense)) |
|
|
234 |
assert dice([], [], sparse=True) == 0. |
|
|
235 |
assert dice(np.zeros(10), np.zeros(10), sparse=False) == 0. |
|
|
236 |
assert_almost_equal(tanimoto(mol1_fp_sparse, mol2_fp_sparse, sparse=True), |
|
|
237 |
tanimoto(mol1_fp_dense, mol2_fp_dense)) |
|
|
238 |
assert tanimoto([], [], sparse=True) == 0. |
|
|
239 |
assert tanimoto(np.zeros(10), np.zeros(10), sparse=False) == 0. |
|
|
240 |
|
|
|
241 |
|
|
|
242 |
def test_ecfp_repr(): |
|
|
243 |
"""Test exact ECFP representation to track down the changes""" |
|
|
244 |
mol = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)C)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") |
|
|
245 |
|
|
|
246 |
res = [(6, 0, 1, 3, 0, 0, 0), (6, 0, 3, 0, 0, 1, 1), (6, 0, 3, 0, 0, 1, 1), (6, 0, 3, 0, 0, 1, 1), |
|
|
247 |
(6, 0, 2, 1, 0, 1, 1), (6, 0, 2, 1, 0, 1, 1), (6, 0, 2, 1, 0, 1, 1), (6, 0, 1, 3, 0, 0, 0), |
|
|
248 |
(7, 0, 2, 1, 0, 0, 0), (6, 0, 3, 0, 0, 0, 0), (8, 0, 1, 0, 0, 0, 0), (6, 0, 2, 2, 0, 0, 0), |
|
|
249 |
(7, 0, 3, 0, 0, 1, 0), (6, 0, 2, 2, 0, 1, 0), (6, 0, 2, 2, 0, 1, 0), (7, 0, 3, 0, 0, 1, 0), |
|
|
250 |
(6, 0, 2, 2, 0, 1, 0), (6, 0, 2, 2, 0, 1, 0), (6, 0, 2, 2, 0, 0, 0), (6, 0, 3, 0, 0, 0, 0), |
|
|
251 |
(8, 0, 1, 0, 0, 0, 0), (7, 0, 3, 0, 0, 1, 0), (6, 0, 2, 2, 0, 1, 0), (6, 0, 2, 2, 0, 1, 0), |
|
|
252 |
(6, 0, 3, 0, 0, 1, 1), (6, 0, 3, 0, 0, 1, 1), (6, 0, 2, 2, 0, 1, 0), (6, 0, 2, 1, 0, 1, 1), |
|
|
253 |
(6, 0, 2, 1, 0, 1, 1), (16, 0, 2, 0, 0, 1, 1)] |
|
|
254 |
|
|
|
255 |
assert_array_equal([_ECFP_atom_repr(mol, i) for i in range(len(mol.atoms))], res) |
|
|
256 |
|
|
|
257 |
|
|
|
258 |
def test_ecfp(): |
|
|
259 |
"""ECFP fingerprints""" |
|
|
260 |
mol1 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)C)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") |
|
|
261 |
mol2 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)O)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") |
|
|
262 |
|
|
|
263 |
mol1_fp = ECFP(mol1, depth=8, size=4096, sparse=False) |
|
|
264 |
mol2_fp = ECFP(mol2, depth=8, size=4096, sparse=False) |
|
|
265 |
|
|
|
266 |
ref1 = [2, 100, 176, 185, 200, 203, 359, 382, 447, 509, 518, 550, 572, 583, |
|
|
267 |
598, 606, 607, 684, 818, 821, 832, 861, 960, 992, 1006, 1019, 1042, |
|
|
268 |
1050, 1059, 1103, 1175, 1281, 1315, 1377, 1431, 1470, 1479, 1512, |
|
|
269 |
1577, 1588, 1598, 1620, 1633, 1647, 1663, 1723, 1749, 1751, 1775, |
|
|
270 |
1781, 1821, 1837, 1899, 1963, 1969, 1986, 2013, 2253, 2343, 2355, |
|
|
271 |
2368, 2435, 2547, 2654, 2657, 2702, 2722, 2725, 2803, 2816, 2853, |
|
|
272 |
2870, 2920, 2992, 3028, 3056, 3074, 3103, 3190, 3203, 3277, 3321, |
|
|
273 |
3362, 3377, 3383, 3401, 3512, 3546, 3552, 3585, 3593, 3617, 3674, |
|
|
274 |
3759, 3784, 3790, 3832, 3895, 3937, 3956, 3974, 4007, 4033] |
|
|
275 |
|
|
|
276 |
ref2 = [43, 100, 176, 200, 203, 231, 382, 396, 447, 490, 518, 583, 606, |
|
|
277 |
607, 650, 818, 821, 832, 840, 861, 907, 950, 960, 992, 1006, 1013, |
|
|
278 |
1019, 1042, 1050, 1059, 1103, 1104, 1112, 1175, 1281, 1293, 1315, |
|
|
279 |
1377, 1431, 1470, 1512, 1543, 1577, 1588, 1598, 1633, 1647, 1663, |
|
|
280 |
1723, 1749, 1751, 1757, 1759, 1775, 1781, 1821, 1837, 1880, 1963, |
|
|
281 |
1969, 1986, 2253, 2355, 2368, 2435, 2544, 2547, 2654, 2702, 2722, |
|
|
282 |
2725, 2726, 2799, 2816, 2853, 2870, 2920, 2992, 3028, 3074, 3190, |
|
|
283 |
3203, 3277, 3290, 3333, 3362, 3383, 3401, 3512, 3546, 3552, 3585, |
|
|
284 |
3593, 3617, 3640, 3660, 3674, 3759, 3784, 3790, 3805, 3832, 3856, |
|
|
285 |
3895, 3924, 3956, 3974, 3992, 4007, 4033] |
|
|
286 |
|
|
|
287 |
assert_array_equal(ref1, np.where(mol1_fp)[0]) |
|
|
288 |
assert_array_equal(ref2, np.where(mol2_fp)[0]) |
|
|
289 |
|
|
|
290 |
assert_almost_equal(dice(mol1_fp, mol2_fp), 0.69999999) |
|
|
291 |
assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.63846153) |
|
|
292 |
|
|
|
293 |
# adding Hs should not change anything |
|
|
294 |
mol1.addh() |
|
|
295 |
mol2.addh() |
|
|
296 |
|
|
|
297 |
mol1_fp = ECFP(mol1, depth=8, size=4096, sparse=False) |
|
|
298 |
mol2_fp = ECFP(mol2, depth=8, size=4096, sparse=False) |
|
|
299 |
|
|
|
300 |
assert_array_equal(ref1, np.where(mol1_fp)[0]) |
|
|
301 |
assert_array_equal(ref2, np.where(mol2_fp)[0]) |
|
|
302 |
|
|
|
303 |
assert_almost_equal(dice(mol1_fp, mol2_fp), 0.69999999) |
|
|
304 |
assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.63846153) |
|
|
305 |
|
|
|
306 |
# removig Hs should not change anything |
|
|
307 |
mol1.removeh() |
|
|
308 |
mol2.removeh() |
|
|
309 |
|
|
|
310 |
mol1_fp = ECFP(mol1, depth=8, size=4096, sparse=False) |
|
|
311 |
mol2_fp = ECFP(mol2, depth=8, size=4096, sparse=False) |
|
|
312 |
|
|
|
313 |
assert_array_equal(ref1, np.where(mol1_fp)[0]) |
|
|
314 |
assert_array_equal(ref2, np.where(mol2_fp)[0]) |
|
|
315 |
|
|
|
316 |
assert_almost_equal(dice(mol1_fp, mol2_fp), 0.69999999) |
|
|
317 |
assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.63846153) |
|
|
318 |
|
|
|
319 |
|
|
|
320 |
def test_fcfp(): |
|
|
321 |
"""FCFP fingerprints""" |
|
|
322 |
mol1 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)C)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") |
|
|
323 |
mol2 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)O)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") |
|
|
324 |
|
|
|
325 |
mol1_fp = ECFP(mol1, depth=8, size=4096, |
|
|
326 |
sparse=False, use_pharm_features=True) |
|
|
327 |
mol2_fp = ECFP(mol2, depth=8, size=4096, |
|
|
328 |
sparse=False, use_pharm_features=True) |
|
|
329 |
|
|
|
330 |
ref1 = [46, 111, 305, 310, 362, 384, 409, 451, 467, 548, 572, 595, 607, |
|
|
331 |
608, 620, 659, 691, 699, 724, 743, 752, 842, 926, 935, 974, 1037, |
|
|
332 |
1072, 1094, 1135, 1143, 1161, 1172, 1313, 1325, 1368, 1399, 1461, |
|
|
333 |
1486, 1488, 1492, 1603, 1619, 1648, 1665, 1666, 1838, 1887, 1900, |
|
|
334 |
1948, 1961, 1972, 1975, 1996, 2000, 2052, 2085, 2094, 2174, 2232, |
|
|
335 |
2236, 2368, 2382, 2383, 2402, 2483, 2492, 2527, 2593, 2616, 2706, |
|
|
336 |
2789, 2899, 2922, 2945, 2966, 3102, 3117, 3176, 3189, 3215, 3225, |
|
|
337 |
3297, 3326, 3349, 3373, 3513, 3525, 3535, 3601, 3619, 3780, 3820, |
|
|
338 |
3897, 3919, 3976, 3981, 4050, 4079, 4091] |
|
|
339 |
|
|
|
340 |
ref2 = [46, 111, 143, 172, 259, 305, 362, 409, 451, 467, 507, 518, 548, |
|
|
341 |
583, 595, 607, 608, 620, 639, 691, 693, 724, 752, 784, 825, 842, |
|
|
342 |
926, 1037, 1087, 1094, 1098, 1135, 1143, 1161, 1172, 1286, 1325, |
|
|
343 |
1368, 1371, 1395, 1399, 1461, 1486, 1488, 1492, 1565, 1619, 1648, |
|
|
344 |
1655, 1665, 1887, 1890, 1900, 1948, 1961, 1968, 1972, 1975, 1976, |
|
|
345 |
1996, 2000, 2007, 2094, 2125, 2174, 2232, 2236, 2368, 2382, 2383, |
|
|
346 |
2483, 2492, 2571, 2593, 2606, 2638, 2706, 2789, 2922, 2945, 2966, |
|
|
347 |
2986, 3030, 3100, 3102, 3117, 3227, 3326, 3350, 3373, 3406, 3419, |
|
|
348 |
3535, 3577, 3619, 3697, 3742, 3820, 3839, 3919, 3981, 4043, 4050, |
|
|
349 |
4079, 4091] |
|
|
350 |
|
|
|
351 |
assert_array_equal(ref1, np.where(mol1_fp)[0]) |
|
|
352 |
assert_array_equal(ref2, np.where(mol2_fp)[0]) |
|
|
353 |
|
|
|
354 |
assert_almost_equal(dice(mol1_fp, mol2_fp), 0.64074074) |
|
|
355 |
assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.5) |
|
|
356 |
|
|
|
357 |
# adding Hs should not change anything |
|
|
358 |
mol1.addh() |
|
|
359 |
mol2.addh() |
|
|
360 |
|
|
|
361 |
assert_array_equal(ref1, np.where(mol1_fp)[0]) |
|
|
362 |
assert_array_equal(ref2, np.where(mol2_fp)[0]) |
|
|
363 |
|
|
|
364 |
assert_almost_equal(dice(mol1_fp, mol2_fp), 0.64074074) |
|
|
365 |
assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.5) |
|
|
366 |
|
|
|
367 |
|
|
|
368 |
def test_ecfp_invaraiants(): |
|
|
369 |
"""ECFP: test random reordering""" |
|
|
370 |
sildenafil = oddt.toolkit.readstring("smi", "CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12") |
|
|
371 |
|
|
|
372 |
params = {'depth': 4, 'size': 4096, 'sparse': True} |
|
|
373 |
fp = ECFP(sildenafil, **params) |
|
|
374 |
|
|
|
375 |
for n in range(10): |
|
|
376 |
sildenafil = shuffle_mol(sildenafil) |
|
|
377 |
assert_array_equal(fp, ECFP(sildenafil, **params)) |
|
|
378 |
|
|
|
379 |
|
|
|
380 |
def test_splif(): |
|
|
381 |
"""SPLIF fingerprints""" |
|
|
382 |
mols = list(oddt.toolkit.readfile('sdf', os.path.join( |
|
|
383 |
test_data_dir, 'data/dude/xiap/actives_docked.sdf'))) |
|
|
384 |
mols = list(filter(lambda x: x.title == '312335', mols)) |
|
|
385 |
list(map(lambda x: x.addh(only_polar=True), mols)) |
|
|
386 |
receptor = next(oddt.toolkit.readfile('pdb', os.path.join( |
|
|
387 |
test_data_dir, 'data/dude/xiap/receptor_rdkit.pdb'))) |
|
|
388 |
receptor.protein = True |
|
|
389 |
receptor.addh(only_polar=True) |
|
|
390 |
splif = SPLIF(mols[0], receptor) |
|
|
391 |
reference = [6, 38, 49, 53, 53, 53, 70, 70, 81, 81, 81, 81, 165, 216, 219, |
|
|
392 |
249, 330, 330, 333, 377, 380, 396, 396, 396, 423, 423, 479, |
|
|
393 |
479, 498, 498, 498, 570, 592, 625, 638, 768, 768, 817, 818, |
|
|
394 |
818, 818, 818, 858, 884, 888, 907, 930, 934, 935, 971, 1023, |
|
|
395 |
1041, 1115, 1142, 1184, 1184, 1252, 1263, 1269, 1275, 1275, |
|
|
396 |
1275, 1315, 1315, 1315, 1337, 1337, 1344, 1351, 1396, 1435, |
|
|
397 |
1465, 1502, 1502, 1502, 1502, 1569, 1569, 1569, 1569, 1569, |
|
|
398 |
1569, 1569, 1569, 1640, 1645, 1660, 1660, 1697, 1697, 1716, |
|
|
399 |
1746, 1756, 1778, 1901, 1937, 1997, 2000, 2000, 2000, 2007, |
|
|
400 |
2007, 2020, 2070, 2195, 2274, 2294, 2319, 2415, 2417, 2509, |
|
|
401 |
2528, 2578, 2578, 2584, 2590, 2590, 2624, 2636, 2678, 2678, |
|
|
402 |
2678, 2678, 2678, 2776, 2776, 2789, 2862, 2862, 2894, 2894, |
|
|
403 |
2894, 2923, 2923, 3058, 3073, 3073, 3073, 3073, 3137, 3159, |
|
|
404 |
3159, 3159, 3186, 3218, 3218, 3279, 3279, 3281, 3338, 3358, |
|
|
405 |
3360, 3368, 3387, 3609, 3636, 3636, 3713, 3713, 3716, 3716, |
|
|
406 |
3748, 3767, 3769, 3854, 3871, 3912, 3968, 3986, 3994, 3994, |
|
|
407 |
4069] |
|
|
408 |
|
|
|
409 |
assert splif['hash'].shape == (172,) |
|
|
410 |
assert_array_equal(splif['ligand_coords'].shape, (172, 7, 3)) |
|
|
411 |
assert_array_equal(splif['protein_coords'].shape, (172, 7, 3)) |
|
|
412 |
assert_array_equal(reference, splif['hash']) |
|
|
413 |
|
|
|
414 |
|
|
|
415 |
def test_splif_similarity(): |
|
|
416 |
"""SPLIF similarity""" |
|
|
417 |
mols = list(oddt.toolkit.readfile('sdf', os.path.join( |
|
|
418 |
test_data_dir, 'data/dude/xiap/actives_docked.sdf'))) |
|
|
419 |
mols = list(filter(lambda x: x.title == '312335', mols)) |
|
|
420 |
list(map(lambda x: x.addh(only_polar=True), mols)) |
|
|
421 |
receptor = next(oddt.toolkit.readfile('pdb', os.path.join( |
|
|
422 |
test_data_dir, 'data/dude/xiap/receptor_rdkit.pdb'))) |
|
|
423 |
receptor.protein = True |
|
|
424 |
receptor.addh(only_polar=True) |
|
|
425 |
ref = SPLIF(mols[0], receptor) |
|
|
426 |
splif_fps = [SPLIF(mol, receptor) for mol in mols] |
|
|
427 |
outcome = [similarity_SPLIF(ref, fp) for fp in splif_fps] |
|
|
428 |
target_outcome = np.array([1.000, 0.779, 0.660, 0.805, 0.630, |
|
|
429 |
0.802, 0.366, 0.817, 0.378, 0.553, |
|
|
430 |
0.732, 0.705, 0.856, 0.797, 0.502, |
|
|
431 |
0.418, 0.653, 0.436, 0.708, 0.688]) |
|
|
432 |
|
|
|
433 |
assert_array_almost_equal(outcome, target_outcome, decimal=3) |
|
|
434 |
|
|
|
435 |
# check if similarity is symmetric |
|
|
436 |
for fp1, fp2 in combinations(splif_fps, 2): |
|
|
437 |
assert similarity_SPLIF(fp1, fp2) == similarity_SPLIF(fp2, fp1) |
|
|
438 |
|
|
|
439 |
|
|
|
440 |
def test_plec(): |
|
|
441 |
"""PLEC fingerprints""" |
|
|
442 |
mols = list(oddt.toolkit.readfile('sdf', os.path.join( |
|
|
443 |
test_data_dir, 'data/dude/xiap/actives_docked.sdf'))) |
|
|
444 |
mols = list(filter(lambda x: x.title == '312335', mols)) |
|
|
445 |
list(map(lambda x: x.removeh(), mols)) |
|
|
446 |
receptor = next(oddt.toolkit.readfile('pdb', os.path.join( |
|
|
447 |
test_data_dir, 'data/dude/xiap/receptor_rdkit.pdb'))) |
|
|
448 |
receptor.protein = True |
|
|
449 |
receptor.removeh() |
|
|
450 |
plec = PLEC(mols[0], receptor) |
|
|
451 |
reference = [80, 119, 120, 120, 120, 120, 137, 138, 155, 155, 155, 155, |
|
|
452 |
155, 155, 155, 161, 199, 214, 214, 214, 226, 226, 233, 266, |
|
|
453 |
282, 283, 283, 313, 313, 386, 386, 430, 431, 431, 432, 448, |
|
|
454 |
581, 581, 643, 662, 684, 690, 729, 737, 741, 778, 778, 795, |
|
|
455 |
799, 799, 812, 812, 876, 877, 894, 907, 924, 924, 925, 925, |
|
|
456 |
935, 935, 935, 935, 935, 964, 964, 964, 993, 993, 996, 996, |
|
|
457 |
1002, 1002, 1042, 1042, 1066, 1066, 1077, 1113, 1119, 1224, |
|
|
458 |
1266, 1266, 1290, 1322, 1322, 1334, 1334, 1403, 1411, 1411, |
|
|
459 |
1461, 1475, 1480, 1497, 1521, 1584, 1584, 1614, 1618, 1618, |
|
|
460 |
1618, 1618, 1691, 1694, 1694, 1755, 1755, 1755, 1755, 1786, |
|
|
461 |
1835, 1835, 1867, 1953, 1953, 1953, 1953, 1963, 1970, 1970, |
|
|
462 |
1990, 1992, 1992, 1992, 2024, 2024, 2060, 2252, 2373, 2383, |
|
|
463 |
2383, 2390, 2390, 2451, 2537, 2538, 2552, 2555, 2558, 2640, |
|
|
464 |
2720, 2752, 2791, 2821, 2821, 2931, 2950, 2957, 2957, 2959, |
|
|
465 |
2961, 2961, 2961, 2963, 2970, 2970, 2982, 3034, 3049, 3066, |
|
|
466 |
3084, 3084, 3084, 3104, 3126, 3227, 3248, 3293, 3293, 3293, |
|
|
467 |
3420, 3439, 3517, 3539, 3546, 3546, 3546, 3546, 3553, 3559, |
|
|
468 |
3596, 3630, 3643, 3643, 3674, 3707, 3708, 3716, 3738, 3742, |
|
|
469 |
3828, 3846, 3859, 3876, 3887, 3904, 3904, 3904, 3916, 3916, |
|
|
470 |
3939, 3941, 3981, 3981, 3991, 3993, 4010, 4097, 4127, 4127, |
|
|
471 |
4127, 4127, 4165, 4181, 4192, 4316, 4330, 4372, 4391, 4461, |
|
|
472 |
4462, 4463, 4542, 4542, 4542, 4549, 4549, 4549, 4549, 4614, |
|
|
473 |
4615, 4657, 4668, 4670, 4686, 4686, 4686, 4688, 4688, 4688, |
|
|
474 |
4688, 4695, 4729, 4740, 4741, 4744, 4744, 4744, 4744, 4756, |
|
|
475 |
4814, 4828, 4828, 4861, 4861, 4861, 4861, 4861, 4861, 4861, |
|
|
476 |
4861, 4861, 4861, 4861, 4861, 4861, 4861, 4861, 4861, 4916, |
|
|
477 |
4945, 4945, 5011, 5037, 5042, 5044, 5046, 5055, 5078, 5080, |
|
|
478 |
5101, 5101, 5126, 5139, 5146, 5189, 5193, 5232, 5271, 5314, |
|
|
479 |
5321, 5350, 5379, 5439, 5439, 5439, 5439, 5481, 5482, 5535, |
|
|
480 |
5563, 5565, 5565, 5585, 5601, 5601, 5626, 5626, 5631, 5631, |
|
|
481 |
5631, 5631, 5631, 5631, 5639, 5670, 5688, 5690, 5742, 5804, |
|
|
482 |
5804, 5864, 5871, 5885, 5983, 5992, 6010, 6010, 6010, 6059, |
|
|
483 |
6059, 6096, 6164, 6183, 6183, 6197, 6234, 6256, 6261, 6261, |
|
|
484 |
6277, 6277, 6277, 6277, 6299, 6333, 6333, 6388, 6388, 6404, |
|
|
485 |
6428, 6428, 6428, 6428, 6431, 6431, 6445, 6449, 6450, 6480, |
|
|
486 |
6496, 6519, 6519, 6540, 6582, 6642, 6654, 6654, 6671, 6717, |
|
|
487 |
6722, 6735, 6735, 6735, 6764, 6764, 6781, 6781, 6781, 6781, |
|
|
488 |
6788, 6788, 6803, 6808, 6833, 6838, 6838, 6950, 6979, 6979, |
|
|
489 |
6997, 7069, 7115, 7194, 7250, 7254, 7277, 7288, 7352, 7464, |
|
|
490 |
7493, 7506, 7506, 7520, 7530, 7530, 7530, 7542, 7546, 7561, |
|
|
491 |
7608, 7678, 7678, 7685, 7701, 7701, 7701, 7752, 7752, 7752, |
|
|
492 |
7790, 7847, 7957, 7957, 7957, 7959, 8003, 8003, 8003, 8010, |
|
|
493 |
8083, 8086, 8086, 8086, 8086, 8113, 8116, 8160, 8190, 8230, |
|
|
494 |
8230, 8262, 8262, 8282, 8284, 8284, 8292, 8297, 8327, 8327, |
|
|
495 |
8383, 8383, 8383, 8418, 8418, 8426, 8457, 8484, 8484, 8543, |
|
|
496 |
8543, 8580, 8629, 8651, 8655, 8697, 8726, 8781, 8784, 8796, |
|
|
497 |
8837, 8850, 8923, 9034, 9040, 9077, 9077, 9099, 9134, 9180, |
|
|
498 |
9206, 9257, 9281, 9304, 9304, 9333, 9341, 9358, 9393, 9394, |
|
|
499 |
9432, 9450, 9450, 9455, 9455, 9481, 9493, 9493, 9505, 9537, |
|
|
500 |
9547, 9572, 9585, 9610, 9610, 9661, 9689, 9690, 9690, 9700, |
|
|
501 |
9700, 9733, 9736, 9736, 9736, 9736, 9765, 9784, 9885, 9885, |
|
|
502 |
9885, 9934, 9938, 9968, 9968, 10037, 10080, 10080, 10103, |
|
|
503 |
10113, 10113, 10114, 10115, 10115, 10115, 10139, 10139, 10139, |
|
|
504 |
10139, 10139, 10181, 10181, 10181, 10181, 10185, 10286, 10295, |
|
|
505 |
10317, 10317, 10340, 10340, 10340, 10340, 10352, 10353, 10364, |
|
|
506 |
10364, 10385, 10490, 10490, 10504, 10535, 10539, 10539, 10589, |
|
|
507 |
10589, 10591, 10599, 10648, 10648, 10650, 10650, 10681, 10703, |
|
|
508 |
10714, 10714, 10714, 10739, 10739, 10793, 10806, 10806, 10806, |
|
|
509 |
10837, 10865, 10865, 10871, 10903, 10978, 10978, 11056, 11056, |
|
|
510 |
11141, 11159, 11207, 11213, 11257, 11272, 11360, 11362, 11377, |
|
|
511 |
11454, 11454, 11458, 11458, 11458, 11539, 11563, 11580, 11580, |
|
|
512 |
11580, 11605, 11605, 11610, 11610, 11613, 11624, 11664, 11664, |
|
|
513 |
11683, 11683, 11697, 11698, 11701, 11707, 11753, 11835, 11846, |
|
|
514 |
11852, 11858, 11876, 11879, 11890, 11957, 11957, 12009, 12115, |
|
|
515 |
12130, 12151, 12222, 12268, 12290, 12290, 12295, 12295, 12320, |
|
|
516 |
12431, 12448, 12475, 12475, 12475, 12481, 12485, 12487, 12587, |
|
|
517 |
12632, 12632, 12634, 12641, 12641, 12641, 12664, 12761, 12761, |
|
|
518 |
12778, 12832, 12878, 12878, 12884, 12958, 12982, 12982, 12982, |
|
|
519 |
12982, 12992, 13057, 13079, 13121, 13129, 13200, 13200, 13277, |
|
|
520 |
13277, 13317, 13317, 13320, 13320, 13336, 13388, 13434, 13443, |
|
|
521 |
13475, 13495, 13517, 13517, 13553, 13602, 13637, 13655, 13658, |
|
|
522 |
13658, 13688, 13688, 13774, 13774, 13784, 13784, 13784, 13786, |
|
|
523 |
13791, 13791, 13809, 13839, 13839, 13839, 13839, 13839, 13876, |
|
|
524 |
13905, 13906, 13906, 13906, 13906, 13920, 13920, 13920, 13920, |
|
|
525 |
13920, 13949, 13949, 14058, 14122, 14122, 14133, 14133, 14198, |
|
|
526 |
14259, 14259, 14317, 14332, 14368, 14386, 14423, 14423, 14423, |
|
|
527 |
14423, 14423, 14423, 14423, 14439, 14440, 14447, 14464, 14464, |
|
|
528 |
14469, 14505, 14510, 14510, 14513, 14516, 14516, 14529, 14529, |
|
|
529 |
14529, 14549, 14563, 14563, 14570, 14570, 14570, 14582, 14605, |
|
|
530 |
14605, 14611, 14748, 14748, 14750, 14757, 14772, 14798, 14802, |
|
|
531 |
14810, 14854, 14857, 14857, 14878, 14878, 14903, 14903, 14993, |
|
|
532 |
14993, 14996, 15008, 15012, 15018, 15044, 15044, 15074, 15092, |
|
|
533 |
15092, 15146, 15146, 15191, 15251, 15251, 15253, 15258, 15311, |
|
|
534 |
15311, 15317, 15429, 15429, 15441, 15444, 15498, 15518, 15520, |
|
|
535 |
15622, 15622, 15622, 15651, 15672, 15712, 15715, 15798, 15798, |
|
|
536 |
15811, 15950, 15982, 15982, 15987, 16023, 16023, 16042, 16049, |
|
|
537 |
16054, 16080, 16099, 16119, 16119, 16119, 16174, 16174, 16213, |
|
|
538 |
16225, 16229, 16234, 16234, 16234, 16252, 16252, 16252, 16252, |
|
|
539 |
16252, 16320, 16328, 16362, 16362] |
|
|
540 |
|
|
|
541 |
assert_array_equal(reference, plec) |
|
|
542 |
assert_array_equal(plec.shape, (860,)) |
|
|
543 |
|
|
|
544 |
# Hydrogens should not impact the PLEC fingerprint |
|
|
545 |
list(map(lambda x: x.addh(only_polar=True), mols)) |
|
|
546 |
receptor.addh(only_polar=True) |
|
|
547 |
plec = PLEC(mols[0], receptor) |
|
|
548 |
assert_array_equal(reference, plec, "Polar Hs break PLEC") |
|
|
549 |
|
|
|
550 |
list(map(lambda x: x.addh(), mols)) |
|
|
551 |
receptor.addh() |
|
|
552 |
plec = PLEC(mols[0], receptor) |
|
|
553 |
assert_array_equal(reference, plec, "Non-polar Hs break PLEC") |
|
|
554 |
|
|
|
555 |
|
|
|
556 |
def test_plec_binded_hoh(): |
|
|
557 |
# if water coordinates metal in PDB and ligand is in contact with it, HOH |
|
|
558 |
# will pop up in metals environment, thus we cannot ignore HOHs in repr_dict |
|
|
559 |
|
|
|
560 |
if (oddt.toolkit.backend == 'ob' or |
|
|
561 |
(oddt.toolkit.backend == 'rdk' and |
|
|
562 |
oddt.toolkit.__version__ >= '2017.03')): |
|
|
563 |
ligand = next(oddt.toolkit.readfile('sdf', os.path.join( |
|
|
564 |
test_data_dir, 'data', 'pdb', '3kwa_ligand.sdf'))) |
|
|
565 |
protein = next(oddt.toolkit.readfile('pdb', os.path.join( |
|
|
566 |
test_data_dir, 'data', 'pdb', '3kwa_5Apocket.pdb'))) |
|
|
567 |
protein.protein = True |
|
|
568 |
|
|
|
569 |
assert len(PLEC(ligand, protein, ignore_hoh=True)) == 465 |
|
|
570 |
assert len(PLEC(ligand, protein, ignore_hoh=False)) == 560 |
|
|
571 |
|
|
|
572 |
|
|
|
573 |
def test_plec_similarity(): |
|
|
574 |
"""PLEC similarity""" |
|
|
575 |
mols = list(oddt.toolkit.readfile('sdf', os.path.join( |
|
|
576 |
test_data_dir, 'data/dude/xiap/actives_docked.sdf'))) |
|
|
577 |
mols = list(filter(lambda x: x.title == '312335', mols)) |
|
|
578 |
list(map(lambda x: x.addh(only_polar=True), mols)) |
|
|
579 |
receptor = next(oddt.toolkit.readfile('pdb', os.path.join( |
|
|
580 |
test_data_dir, 'data/dude/xiap/receptor_rdkit.pdb'))) |
|
|
581 |
receptor.protein = True |
|
|
582 |
receptor.addh(only_polar=True) |
|
|
583 |
reference_sparse = PLEC(mols[0], receptor) |
|
|
584 |
outcome_sparse = [dice(reference_sparse, PLEC(mol, receptor), |
|
|
585 |
sparse=True) for mol in mols[1:]] |
|
|
586 |
target_outcome = np.array([0.833, 0.729, 0.849, 0.785, 0.821, |
|
|
587 |
0.604, 0.868, 0.656, 0.712, 0.652, |
|
|
588 |
0.699, 0.785, 0.736, 0.745, 0.661, |
|
|
589 |
0.667, 0.555, 0.616, 0.714]) |
|
|
590 |
reference_dense = PLEC(mols[0], receptor, sparse=False) |
|
|
591 |
outcome_dense = [dice(reference_dense, PLEC(mol, receptor, sparse=False), |
|
|
592 |
sparse=False) for mol in mols[1:]] |
|
|
593 |
assert_array_almost_equal(outcome_sparse, target_outcome, decimal=2) |
|
|
594 |
assert_array_almost_equal(outcome_dense, target_outcome, decimal=2) |
|
|
595 |
|
|
|
596 |
|
|
|
597 |
def test_molecular_shingles(): |
|
|
598 |
sildenafil = oddt.toolkit.readstring("smi", "CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12") |
|
|
599 |
if oddt.toolkit.backend == 'ob': |
|
|
600 |
target_shingles = [ |
|
|
601 |
'CCC', 'CCCc', 'CCCc(c)n', 'CCN(C)CC', 'CCN(CC)S(=O)(=O)c', 'CCO', 'CCOc', 'CCOc(c)c', 'CCc1nncc1n', 'CN(C)C', |
|
|
602 |
'CN(C)S(=O)(=O)c(c)c', 'CN(S)CCN', 'CN(S)CCN', 'COc(cc)c(c)c', 'Cc1ccn(n1)C', 'Cn(c)n', 'Cn1ncc(c1c(=O)[nH])n', |
|
|
603 |
'Cn1nccc1c', 'NCCN(C)C', 'NCCN(C)C', 'cS(=O)(=O)N', 'cS(=O)(=O)N', 'c[nH]c(=O)c(c)n', 'cc(=O)[nH]', |
|
|
604 |
'cc([nH])nc(c)c', 'cc(c)cc(S)c', 'cc(n)[nH]c(=O)c', 'ccc(c(O)c)c(n)[nH]', 'ccc(cc)S(=O)(=O)N', 'cccc(O)c', |
|
|
605 |
'cccc(S)c', 'cnc([nH]c)c(c)c', 'cnc1c(C)nnc1c'] |
|
|
606 |
else: |
|
|
607 |
|
|
|
608 |
target_shingles = [ |
|
|
609 |
'CCC', 'CCN(C)CC', 'CCO', 'CCc1nncc1n', 'CN(C)C', 'CN(C)CCN', 'CN(C)CCN', 'CN(S)CCN', 'CN(S)CCN', 'Cc1ccn(C)n1', |
|
|
610 |
'Cn1ncc(n)c1c([nH])=O', 'c-c([nH])nc(c)c', 'c-c(c)c(cc)OC', 'c-c(c)cc(c)S', 'c-c(n)[nH]c(c)=O', 'cCCC', 'cOCC', |
|
|
611 |
'cS(=O)(=O)N(CC)CC', 'cS(N)(=O)=O', 'cS(N)(=O)=O', 'c[nH]c(=O)c(c)n', 'cc([nH])=O', 'cc(c)OCC', |
|
|
612 |
'cc(c)S(=O)(=O)N(C)C', 'cc(n)CCC', 'cc1ccnn1C', 'ccc(-c(n)[nH])c(c)O', 'ccc(cc)S(N)(=O)=O', 'cccc(c)O', |
|
|
613 |
'cccc(c)S', 'cn(C)n', 'cnc([nH]c)-c(c)c', 'cnc1c(C)nnc1c'] |
|
|
614 |
|
|
|
615 |
for n in range(10): |
|
|
616 |
sildenafil = shuffle_mol(sildenafil) |
|
|
617 |
shingles = sorted(get_molecular_shingles(sildenafil)) |
|
|
618 |
assert_array_equal(shingles, target_shingles) |