|
a |
|
b/tests/test_toolkit.py |
|
|
1 |
import os |
|
|
2 |
from collections import OrderedDict, deque |
|
|
3 |
|
|
|
4 |
from six.moves.cPickle import loads, dumps |
|
|
5 |
import numpy as np |
|
|
6 |
import pandas as pd |
|
|
7 |
|
|
|
8 |
import pytest |
|
|
9 |
from numpy.testing import assert_array_equal, assert_array_almost_equal |
|
|
10 |
|
|
|
11 |
import oddt |
|
|
12 |
from oddt.spatial import rmsd |
|
|
13 |
from oddt.toolkits.common import canonize_ring_path |
|
|
14 |
|
|
|
15 |
test_data_dir = os.path.dirname(os.path.abspath(__file__)) |
|
|
16 |
xiap_receptor = os.path.join(test_data_dir, 'data', 'dude', 'xiap', |
|
|
17 |
'receptor_rdkit.pdb') |
|
|
18 |
xiap_actives = os.path.join(test_data_dir, 'data', 'dude', 'xiap', |
|
|
19 |
'actives_docked.sdf') |
|
|
20 |
|
|
|
21 |
|
|
|
22 |
def test_mol(): |
|
|
23 |
"""Test common molecule operations""" |
|
|
24 |
# Hydrogen manipulation in small molecules |
|
|
25 |
mol = oddt.toolkit.readstring('smi', 'c1ccccc1O') |
|
|
26 |
assert len(mol.atoms) == 7 |
|
|
27 |
mol.addh() |
|
|
28 |
assert len(mol.atoms) == 13 |
|
|
29 |
mol.removeh() |
|
|
30 |
mol.addh(only_polar=True) |
|
|
31 |
assert len(mol.atoms) == 8 |
|
|
32 |
mol.removeh() |
|
|
33 |
assert len(mol.atoms) == 7 |
|
|
34 |
|
|
|
35 |
# Hydrogen manipulation in proteins |
|
|
36 |
protein = next(oddt.toolkit.readfile('pdb', xiap_receptor)) |
|
|
37 |
protein.protein = True |
|
|
38 |
|
|
|
39 |
res_atoms_n = [6, 10, 8, 8, 7, 11, 8, 7, 6, 8, 5, 8, 12, 9, 5, 11, 8, |
|
|
40 |
11, 7, 11, 4, 7, 14, 8, 12, 6, 7, 8, 9, 9, 9, 8, 5, 11, |
|
|
41 |
5, 4, 11, 12, 5, 8, 4, 9, 4, 8, 9, 7, 9, 6, 11, 10, 6, |
|
|
42 |
4, 4, 4, 8, 7, 8, 14, 9, 7, 6, 9, 8, 7, 14, 9, 9, 10, 5, |
|
|
43 |
9, 14, 12, 7, 4, 6, 9, 12, 8, 8, 9, 9, 9, 4, 9, 9, 12, |
|
|
44 |
8, 8, 8, 8, 10, 8, 7, 10, 11, 12, 6, 7, 8, 11, 8, 9, 4, |
|
|
45 |
8, 9, 7, 9, 6, 6, 4, 4, 4, 8, 7, 8, 14, 9, 7, 6, 9, 8, |
|
|
46 |
7, 14, 9, 9, 10, 5, 9, 14, 12, 7, 4, 8, 10, 8, 7, 1, 1] |
|
|
47 |
res_atoms_n_addh = [12, 17, 17, 19, 14, 23, 14, 14, 11, 17, 10, 13, 21, |
|
|
48 |
16, 10, 23, 19, 20, 14, 20, 7, 14, 24, 19, 21, 11, |
|
|
49 |
16, 14, 21, 16, 17, 19, 10, 23, 10, 7, 20, 21, 10, |
|
|
50 |
19, 7, 16, 7, 13, 21, 16, 21, 10, 20, 17, 10, 7, 7, |
|
|
51 |
7, 19, 14, 13, 24, 21, 14, 11, 16, 13, 14, 24, 16, |
|
|
52 |
17, 16, 10, 21, 24, 21, 14, 7, 10, 21, 21, 19, 19, |
|
|
53 |
16, 17, 21, 7, 17, 16, 21, 19, 14, 14, 19, 17, 19, |
|
|
54 |
14, 18, 25, 22, 11, 17, 21, 22, 21, 17, 7, 13, 21, |
|
|
55 |
16, 21, 11, 11, 7, 7, 7, 19, 14, 13, 24, 21, 14, |
|
|
56 |
11, 16, 13, 14, 24, 16, 17, 16, 10, 21, 24, 21, 14, |
|
|
57 |
8, 20, 17, 19, 15, 1, 1] |
|
|
58 |
res_atoms_n_polarh = [9, 12, 9, 9, 7, 16, 11, 7, 8, 9, 6, 10, 14, 11, |
|
|
59 |
6, 16, 9, 12, 9, 12, 5, 9, 16, 9, 14, 8, 8, 11, |
|
|
60 |
12, 11, 12, 9, 6, 16, 6, 5, 12, 14, 6, 9, 5, 11, |
|
|
61 |
5, 10, 12, 8, 12, 7, 12, 12, 7, 5, 5, 5, 9, 9, |
|
|
62 |
10, 16, 12, 7, 8, 11, 10, 7, 16, 11, 12, 11, 6, |
|
|
63 |
12, 16, 14, 7, 5, 7, 12, 14, 9, 9, 11, 12, 12, 5, |
|
|
64 |
12, 11, 14, 9, 11, 11, 9, 12, 9, 9, 12, 17, 15, |
|
|
65 |
8, 8, 10, 13, 10, 12, 5, 10, 12, 8, 12, 7, 8, 5, |
|
|
66 |
5, 5, 9, 9, 10, 16, 12, 7, 8, 11, 10, 7, 16, 11, |
|
|
67 |
12, 11, 6, 12, 16, 14, 7, 5, 10, 12, 9, 9, 1, 1] |
|
|
68 |
assert len(protein.atoms) == 1114 |
|
|
69 |
assert len(protein.residues) == 138 |
|
|
70 |
assert_array_equal([len(res.atoms) for res in protein.residues], |
|
|
71 |
res_atoms_n) |
|
|
72 |
|
|
|
73 |
protein.addh() |
|
|
74 |
assert len(protein.atoms) == 2170 |
|
|
75 |
assert len(protein.residues) == 138 |
|
|
76 |
assert_array_equal([len(res.atoms) for res in protein.residues], |
|
|
77 |
res_atoms_n_addh) |
|
|
78 |
|
|
|
79 |
protein.removeh() |
|
|
80 |
protein.addh(only_polar=True) |
|
|
81 |
assert len(protein.atoms) == 1356 |
|
|
82 |
assert len(protein.residues) == 138 |
|
|
83 |
assert_array_equal([len(res.atoms) for res in protein.residues], |
|
|
84 |
res_atoms_n_polarh) |
|
|
85 |
|
|
|
86 |
protein.removeh() |
|
|
87 |
assert len(protein.atoms) == 1114 |
|
|
88 |
assert len(protein.residues) == 138 |
|
|
89 |
assert_array_equal([len(res.atoms) for res in protein.residues], |
|
|
90 |
res_atoms_n) |
|
|
91 |
|
|
|
92 |
|
|
|
93 |
def test_mol_calccharges(): |
|
|
94 |
mol = oddt.toolkit.readstring('smi', 'c1ccccc1O') |
|
|
95 |
mol.addh() |
|
|
96 |
|
|
|
97 |
with pytest.raises(ValueError): |
|
|
98 |
mol.calccharges('mmff94aaaaaa') |
|
|
99 |
|
|
|
100 |
for m in ['gasteiger', 'mmff94']: |
|
|
101 |
mol.calccharges(m) |
|
|
102 |
assert (np.array(mol.charges) != 0.).any() |
|
|
103 |
|
|
|
104 |
protein = next(oddt.toolkit.readfile('pdb', xiap_receptor)) |
|
|
105 |
protein.protein = True |
|
|
106 |
|
|
|
107 |
# for that protein mmff94 charges could not be generated |
|
|
108 |
with pytest.raises(Exception): |
|
|
109 |
protein.calccharges('mmff94') |
|
|
110 |
|
|
|
111 |
|
|
|
112 |
def test_toolkit_hoh(): |
|
|
113 |
"""HOH residues splitting""" |
|
|
114 |
pdb_block = """ATOM 1 C1 GLY 1 0.000 0.000 0.000 1.00 0.00 C |
|
|
115 |
ATOM 2 C2 GLY 1 0.000 0.000 0.000 1.00 0.00 C |
|
|
116 |
ATOM 3 O1 GLY 1 0.000 0.000 0.000 1.00 0.00 O |
|
|
117 |
ATOM 4 O2 GLY 1 0.000 0.000 0.000 1.00 0.00 O |
|
|
118 |
ATOM 5 N1 GLY 1 0.000 0.000 0.000 1.00 0.00 N |
|
|
119 |
ATOM 6 O3 HOH 2 0.000 0.000 0.000 1.00 0.00 O |
|
|
120 |
ATOM 7 O4 HOH 3 0.000 0.000 0.000 1.00 0.00 O |
|
|
121 |
ATOM 8 O5 HOH 4 0.000 0.000 0.000 1.00 0.00 O |
|
|
122 |
""" |
|
|
123 |
protein = oddt.toolkit.readstring('pdb', pdb_block) |
|
|
124 |
protein.protein = True |
|
|
125 |
assert len(protein.residues) == 4 |
|
|
126 |
|
|
|
127 |
protein.addh(only_polar=True) |
|
|
128 |
assert len(protein.residues) == 4 |
|
|
129 |
|
|
|
130 |
protein.addh() |
|
|
131 |
assert len(protein.residues) == 4 |
|
|
132 |
|
|
|
133 |
|
|
|
134 |
def test_pickle(): |
|
|
135 |
"""Pickle molecules""" |
|
|
136 |
mols = list(oddt.toolkit.readfile('sdf', xiap_actives)) |
|
|
137 |
pickled_mols = list(map(lambda x: loads(dumps(x)), mols)) |
|
|
138 |
|
|
|
139 |
assert_array_equal(list(map(lambda x: x.title, mols)), |
|
|
140 |
list(map(lambda x: x.title, pickled_mols))) |
|
|
141 |
|
|
|
142 |
assert_array_equal(list(map(lambda x: x.smiles, mols)), |
|
|
143 |
list(map(lambda x: x.smiles, pickled_mols))) |
|
|
144 |
|
|
|
145 |
for mol, pickled_mol in zip(mols, pickled_mols): |
|
|
146 |
assert dict(mol.data) == dict(pickled_mol.data) |
|
|
147 |
|
|
|
148 |
# Test pickling of atom_dicts |
|
|
149 |
assert_array_equal(list(map(lambda x: x._atom_dict is None, mols)), |
|
|
150 |
[True] * len(mols)) |
|
|
151 |
mols_atom_dict = np.hstack(list(map(lambda x: x.atom_dict, mols))) |
|
|
152 |
assert_array_equal(list(map(lambda x: x._atom_dict is not None, mols)), |
|
|
153 |
[True] * len(mols)) |
|
|
154 |
pickled_mols = list(map(lambda x: loads(dumps(x)), mols)) |
|
|
155 |
assert_array_equal(list(map(lambda x: x._atom_dict is not None, pickled_mols)), |
|
|
156 |
[True] * len(mols)) |
|
|
157 |
pickled_mols_atom_dict = np.hstack(list(map(lambda x: x._atom_dict, pickled_mols))) |
|
|
158 |
for name in mols[0].atom_dict.dtype.names: |
|
|
159 |
if issubclass(np.dtype(mols_atom_dict[name].dtype).type, np.number): |
|
|
160 |
assert_array_almost_equal(mols_atom_dict[name], |
|
|
161 |
pickled_mols_atom_dict[name]) |
|
|
162 |
else: |
|
|
163 |
assert_array_equal(mols_atom_dict[name], |
|
|
164 |
pickled_mols_atom_dict[name]) |
|
|
165 |
|
|
|
166 |
# Lazy Mols |
|
|
167 |
mols = list(oddt.toolkit.readfile('sdf', xiap_actives, lazy=True)) |
|
|
168 |
pickled_mols = list(map(lambda x: loads(dumps(x)), mols)) |
|
|
169 |
|
|
|
170 |
assert_array_equal(list(map(lambda x: x._source is not None, pickled_mols)), |
|
|
171 |
[True] * len(mols)) |
|
|
172 |
|
|
|
173 |
assert_array_equal(list(map(lambda x: x.title, mols)), |
|
|
174 |
list(map(lambda x: x.title, pickled_mols))) |
|
|
175 |
|
|
|
176 |
assert_array_equal(list(map(lambda x: x.smiles, mols)), |
|
|
177 |
list(map(lambda x: x.smiles, pickled_mols))) |
|
|
178 |
|
|
|
179 |
for mol, pickled_mol in zip(mols, pickled_mols): |
|
|
180 |
assert dict(mol.data) == dict(pickled_mol.data) |
|
|
181 |
|
|
|
182 |
|
|
|
183 |
def test_diverse_conformers(): |
|
|
184 |
# FIXME: make toolkit a module so we can import from it |
|
|
185 |
diverse_conformers_generator = oddt.toolkit.diverse_conformers_generator |
|
|
186 |
|
|
|
187 |
mol = oddt.toolkit.readstring( |
|
|
188 |
'smi', |
|
|
189 |
'CN1CCN(S(=O)(C2=CC=C(OCC)C(C3=NC4=C(N(C)N=C4CCC)C(N3)=O)=C2)=O)CC1' |
|
|
190 |
) |
|
|
191 |
mol.make3D() |
|
|
192 |
|
|
|
193 |
res = [] |
|
|
194 |
for conf in diverse_conformers_generator(mol, seed=123456): |
|
|
195 |
res.append(rmsd(mol, conf)) |
|
|
196 |
|
|
|
197 |
assert len(res) == 10 |
|
|
198 |
if oddt.toolkit.backend == 'ob': |
|
|
199 |
if oddt.toolkit.__version__ < '0.3': |
|
|
200 |
assert_array_almost_equal(res, [0., 3.043712, 3.897143, 3.289482, |
|
|
201 |
3.066374, 2.909683, 2.913927, |
|
|
202 |
3.488244, 3.70603, 3.597467]) |
|
|
203 |
else: |
|
|
204 |
assert_array_almost_equal(res, [0.0, 1.372770, 2.489789, 2.759941, |
|
|
205 |
2.968366, 3.228773, 3.392191, |
|
|
206 |
3.921166, 3.185065, 3.283915]) |
|
|
207 |
# else: |
|
|
208 |
# if oddt.toolkit.__version__ > '2016.03.9': |
|
|
209 |
# assert_array_almost_equal(res, [1.237538, 2.346984, 0.900624, |
|
|
210 |
# 3.469511, 1.886213, 2.128909, |
|
|
211 |
# 2.852608, 1.312513, 1.291595, |
|
|
212 |
# 1.326843]) |
|
|
213 |
# else: |
|
|
214 |
# assert_array_almost_equal(res, [3.08995, 2.846358, 3.021795, |
|
|
215 |
# 1.720319, 2.741972, 2.965332, |
|
|
216 |
# 2.925344, 2.930157, 2.934049, |
|
|
217 |
# 3.009545]) |
|
|
218 |
|
|
|
219 |
# check all implemented methods |
|
|
220 |
if oddt.toolkit.backend == 'ob': |
|
|
221 |
methods = ['ga', 'confab'] |
|
|
222 |
else: |
|
|
223 |
methods = ['dg', 'etkdg', 'kdg', 'etdg'] |
|
|
224 |
for method in methods: |
|
|
225 |
assert len(diverse_conformers_generator(mol, |
|
|
226 |
seed=123456, |
|
|
227 |
n_conf=5, |
|
|
228 |
method=method)) == 5 |
|
|
229 |
assert len(diverse_conformers_generator(mol, |
|
|
230 |
seed=123456, |
|
|
231 |
n_conf=10, |
|
|
232 |
method=method)) == 10 |
|
|
233 |
assert len(diverse_conformers_generator(mol, |
|
|
234 |
seed=123456, |
|
|
235 |
n_conf=20, |
|
|
236 |
method=method)) == 20 |
|
|
237 |
|
|
|
238 |
|
|
|
239 |
def test_indices(): |
|
|
240 |
"""Test 0 and 1 based atom indices""" |
|
|
241 |
mol = oddt.toolkit.readstring('smi', 'CCc1cc(C)c(C)cc1-c1ccc(-c2cccc(C)c2)cc1') |
|
|
242 |
atom = mol.atoms[0] |
|
|
243 |
|
|
|
244 |
assert atom.idx0 == 0 |
|
|
245 |
assert atom.idx1 == 1 |
|
|
246 |
|
|
|
247 |
# the unmarked index is deprecated in ODDT |
|
|
248 |
with pytest.warns((DeprecationWarning, FutureWarning)): |
|
|
249 |
assert atom.idx == 1 |
|
|
250 |
|
|
|
251 |
|
|
|
252 |
def test_pickle_protein(): |
|
|
253 |
"""Pickle proteins""" |
|
|
254 |
# Proteins |
|
|
255 |
rec = next(oddt.toolkit.readfile('pdb', xiap_receptor)) |
|
|
256 |
# generate atom_dict |
|
|
257 |
assert rec.atom_dict is not None |
|
|
258 |
|
|
|
259 |
assert rec._atom_dict is not None |
|
|
260 |
pickled_rec = loads(dumps(rec)) |
|
|
261 |
assert pickled_rec.protein is False |
|
|
262 |
assert pickled_rec._atom_dict is not None |
|
|
263 |
|
|
|
264 |
rec.protein = True |
|
|
265 |
# setting protein property should clean atom_dict cache |
|
|
266 |
assert rec._atom_dict is None |
|
|
267 |
# generate atom_dict |
|
|
268 |
assert rec.atom_dict is not None |
|
|
269 |
|
|
|
270 |
pickled_rec = loads(dumps(rec)) |
|
|
271 |
assert pickled_rec.protein is True |
|
|
272 |
assert pickled_rec._atom_dict is not None |
|
|
273 |
|
|
|
274 |
|
|
|
275 |
if oddt.toolkit.backend == 'rdk': |
|
|
276 |
def test_badmol(): |
|
|
277 |
"""Propagate None's for bad molecules""" |
|
|
278 |
mol = oddt.toolkit.readstring('smi', 'c1cc2') |
|
|
279 |
assert mol is None |
|
|
280 |
|
|
|
281 |
|
|
|
282 |
def test_dicts(): |
|
|
283 |
"""Test ODDT numpy structures, aka. dicts""" |
|
|
284 |
mols = list(oddt.toolkit.readfile('sdf', xiap_actives)) |
|
|
285 |
list(map(lambda x: x.addh(only_polar=True), mols)) |
|
|
286 |
|
|
|
287 |
skip_cols = ['radius', 'charge', 'id', |
|
|
288 |
# following fields need to be standarized |
|
|
289 |
'hybridization', |
|
|
290 |
'numhs', |
|
|
291 |
'formalcharge', |
|
|
292 |
] |
|
|
293 |
all_cols = [name for name in mols[0].atom_dict.dtype.names |
|
|
294 |
if name not in ['coords', 'neighbors', 'neighbors_id']] |
|
|
295 |
common_cols = [name for name in all_cols if name not in skip_cols] |
|
|
296 |
|
|
|
297 |
# Small molecules |
|
|
298 |
all_dicts = np.hstack([mol.atom_dict for mol in mols]) |
|
|
299 |
all_dicts = all_dicts[all_dicts['atomicnum'] != 1] |
|
|
300 |
|
|
|
301 |
data = pd.DataFrame({name: all_dicts[name] for name in all_cols}) |
|
|
302 |
data['mol_idx'] = [i |
|
|
303 |
for i, mol in enumerate(mols) |
|
|
304 |
for atom in mol |
|
|
305 |
if atom.atomicnum != 1] |
|
|
306 |
|
|
|
307 |
# Save correct results |
|
|
308 |
# data[common_cols].to_csv( |
|
|
309 |
# os.path.join(test_data_dir, 'data/results/xiap/mols_atom_dict.csv'), |
|
|
310 |
# index=False) |
|
|
311 |
|
|
|
312 |
corr_data = pd.read_csv(os.path.join(test_data_dir, 'data', 'results', |
|
|
313 |
'xiap', 'mols_atom_dict.csv') |
|
|
314 |
).fillna('') |
|
|
315 |
|
|
|
316 |
for name in common_cols: |
|
|
317 |
if issubclass(np.dtype(data[name].dtype).type, np.number): |
|
|
318 |
mask = data[name] - corr_data[name] > 1e-6 |
|
|
319 |
for i in np.argwhere(mask.values): |
|
|
320 |
print(i, data[name][i].values, corr_data[name][i].values, |
|
|
321 |
mols[data['mol_idx'][int(i)]].write('smi')) |
|
|
322 |
assert_array_almost_equal( |
|
|
323 |
data[name], |
|
|
324 |
corr_data[name], |
|
|
325 |
err_msg='Mols atom_dict\'s collumn: "%s" is not equal' % name) |
|
|
326 |
else: |
|
|
327 |
mask = data[name] != corr_data[name] |
|
|
328 |
for i in np.argwhere(mask.values): |
|
|
329 |
print(i, data[name][i].values, corr_data[name][i].values, |
|
|
330 |
mols[data['mol_idx'][int(i)]].write('smi')) |
|
|
331 |
assert_array_equal( |
|
|
332 |
data[name], |
|
|
333 |
corr_data[name], |
|
|
334 |
err_msg='Mols atom_dict\'s collumn: "%s" is not equal' % name) |
|
|
335 |
|
|
|
336 |
# Protein |
|
|
337 |
rec = next(oddt.toolkit.readfile('pdb', xiap_receptor)) |
|
|
338 |
rec.protein = True |
|
|
339 |
rec.addh(only_polar=True) |
|
|
340 |
|
|
|
341 |
skip_cols = ['radius', 'charge', 'resid', 'id', |
|
|
342 |
# following fields need to be standarized |
|
|
343 |
'hybridization', |
|
|
344 |
'numhs', |
|
|
345 |
'formalcharge', |
|
|
346 |
] |
|
|
347 |
common_cols = [name for name in all_cols if name not in skip_cols] |
|
|
348 |
|
|
|
349 |
all_dicts = rec.atom_dict[rec.atom_dict['atomicnum'] != 1] |
|
|
350 |
|
|
|
351 |
data = pd.DataFrame({name: all_dicts[name] for name in all_cols}) |
|
|
352 |
|
|
|
353 |
# Save correct results |
|
|
354 |
# data[common_cols].to_csv( |
|
|
355 |
# os.path.join(test_data_dir, 'data/results/xiap/prot_atom_dict.csv'), |
|
|
356 |
# index=False) |
|
|
357 |
|
|
|
358 |
corr_data = pd.read_csv(os.path.join(test_data_dir, 'data', 'results', |
|
|
359 |
'xiap', 'prot_atom_dict.csv') |
|
|
360 |
).fillna('') |
|
|
361 |
|
|
|
362 |
for name in common_cols: |
|
|
363 |
if issubclass(np.dtype(data[name].dtype).type, np.number): |
|
|
364 |
mask = data[name] - corr_data[name] > 1e-6 |
|
|
365 |
for i in np.argwhere(mask.values): |
|
|
366 |
print(i, |
|
|
367 |
data['atomtype'][i].values, |
|
|
368 |
data['resname'][i].values, |
|
|
369 |
data[name][i].values, |
|
|
370 |
corr_data[name][i].values) |
|
|
371 |
assert_array_almost_equal( |
|
|
372 |
data[name], |
|
|
373 |
corr_data[name], |
|
|
374 |
err_msg='Protein atom_dict\'s collumn: "%s" is not equal' % name) |
|
|
375 |
else: |
|
|
376 |
mask = data[name] != corr_data[name] |
|
|
377 |
for i in np.argwhere(mask.values): |
|
|
378 |
print(i, |
|
|
379 |
data['atomtype'][i].values, |
|
|
380 |
data['resname'][i].values, |
|
|
381 |
data[name][i].values, |
|
|
382 |
corr_data[name][i].values) |
|
|
383 |
assert_array_equal( |
|
|
384 |
data[name], |
|
|
385 |
corr_data[name], |
|
|
386 |
err_msg='Protein atom_dict\'s collumn: "%s" is not equal' % name) |
|
|
387 |
|
|
|
388 |
|
|
|
389 |
def test_ss(): |
|
|
390 |
"""Secondary structure assignment""" |
|
|
391 |
# Alpha Helix |
|
|
392 |
prot_file = os.path.join(test_data_dir, 'data', 'pdb', '1cos_helix.pdb') |
|
|
393 |
protein = next(oddt.toolkit.readfile('pdb', prot_file)) |
|
|
394 |
protein.protein = True |
|
|
395 |
|
|
|
396 |
# print(protein.res_dict['resname']) |
|
|
397 |
# print(protein.res_dict['isalpha']) |
|
|
398 |
# print(protein.res_dict['isbeta']) |
|
|
399 |
|
|
|
400 |
isalpha = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, |
|
|
401 |
18, 19, 20, 21, 22, 23, 24, 25, 26] |
|
|
402 |
|
|
|
403 |
assert len(protein.res_dict) == 29 |
|
|
404 |
assert_array_equal(np.where(protein.res_dict['isalpha'])[0], isalpha) |
|
|
405 |
assert protein.res_dict['isalpha'].sum() == 27 |
|
|
406 |
assert protein.res_dict['isbeta'].sum() == 0 |
|
|
407 |
|
|
|
408 |
# Beta Sheet |
|
|
409 |
prot_file = os.path.join(test_data_dir, 'data', 'pdb', '1icl_sheet.pdb') |
|
|
410 |
protein = next(oddt.toolkit.readfile('pdb', prot_file)) |
|
|
411 |
protein.protein = True |
|
|
412 |
|
|
|
413 |
# print(protein.res_dict['resname']) |
|
|
414 |
# print(protein.res_dict['isalpha']) |
|
|
415 |
# print(protein.res_dict['isbeta']) |
|
|
416 |
# print(protein.res_dict['isbeta']) |
|
|
417 |
# for mask_group in np.split(np.argwhere(protein.res_dict['isbeta']).flatten(), |
|
|
418 |
# np.argwhere(np.diff(np.argwhere(protein.res_dict['isbeta']).flatten()) != 1).flatten() + 1): |
|
|
419 |
# print(mask_group + 1, protein.res_dict[mask_group]['resname']) |
|
|
420 |
|
|
|
421 |
isbeta = [2, 3, 4, 5, 10, 11, 12, 13] |
|
|
422 |
|
|
|
423 |
assert len(protein.res_dict) == 29 |
|
|
424 |
assert_array_equal(np.where(protein.res_dict['isbeta'])[0], isbeta) |
|
|
425 |
assert protein.res_dict['isbeta'].sum() == 8 |
|
|
426 |
assert protein.res_dict['isalpha'].sum() == 0 |
|
|
427 |
|
|
|
428 |
# Protein test |
|
|
429 |
protein = next(oddt.toolkit.readfile('pdb', xiap_receptor)) |
|
|
430 |
protein.protein = True |
|
|
431 |
|
|
|
432 |
# print(protein.res_dict['resname']) |
|
|
433 |
# print(protein.res_dict['isalpha']) |
|
|
434 |
# for mask_group in np.split(np.argwhere(protein.res_dict['isalpha']).flatten(), |
|
|
435 |
# np.argwhere(np.diff(np.argwhere(protein.res_dict['isalpha']).flatten()) != 1).flatten() + 1): |
|
|
436 |
# print(mask_group + 1, protein.res_dict[mask_group]['resname']) |
|
|
437 |
|
|
|
438 |
# print(protein.res_dict['isbeta']) |
|
|
439 |
# for mask_group in np.split(np.argwhere(protein.res_dict['isbeta']).flatten(), |
|
|
440 |
# np.argwhere(np.diff(np.argwhere(protein.res_dict['isbeta']).flatten()) != 1).flatten() + 1): |
|
|
441 |
# print(mask_group + 1, protein.res_dict[mask_group]['resname']) |
|
|
442 |
|
|
|
443 |
isalpha = [15, 16, 17, 18, 19, 20, 28, 29, 30, 31, 32, 33, 63, 64, 65, 66, |
|
|
444 |
67, 68, 69, 70, 75, 76, 77, 78, 79, 80, 83, 84, 85, 86, 87, 88, |
|
|
445 |
89, 90, 91, 121, 122, 123, 124, 125, 126, 127, 128] |
|
|
446 |
isbeta = [36, 37, 38, 45, 46, 47, 52, 53, 54] |
|
|
447 |
|
|
|
448 |
assert_array_equal(np.where(protein.res_dict['isalpha'])[0], isalpha) |
|
|
449 |
assert_array_equal(np.where(protein.res_dict['isbeta'])[0], isbeta) |
|
|
450 |
assert len(protein.res_dict) == 136 |
|
|
451 |
assert protein.res_dict['isalpha'].sum() == 43 |
|
|
452 |
assert protein.res_dict['isbeta'].sum() == 9 |
|
|
453 |
assert (protein.res_dict['isalpha'] & |
|
|
454 |
protein.res_dict['isbeta']).sum() == 0 # Must be zero! |
|
|
455 |
assert (~protein.res_dict['isalpha'] & |
|
|
456 |
~protein.res_dict['isbeta']).sum() == 84 |
|
|
457 |
|
|
|
458 |
|
|
|
459 |
def test_pdbqt(): |
|
|
460 |
"""RDKit PDBQT writer and reader""" |
|
|
461 |
mol = next(oddt.toolkit.readfile('sdf', xiap_actives)) |
|
|
462 |
mol2 = oddt.toolkit.readstring('pdbqt', mol.write('pdbqt')) |
|
|
463 |
assert mol.title == mol2.title |
|
|
464 |
|
|
|
465 |
# test loop breaks in DFS algorithm |
|
|
466 |
mol = oddt.toolkit.readstring('smi', 'CCc1cc(C)c(C)cc1-c1ccc(-c2cccc(C)c2)cc1') |
|
|
467 |
mol.make3D() |
|
|
468 |
|
|
|
469 |
# roundtrip molecule with template |
|
|
470 |
mol2 = oddt.toolkit.readstring('pdbqt', mol.write('pdbqt')) |
|
|
471 |
mol.removeh() |
|
|
472 |
|
|
|
473 |
assert len(mol.atoms) == len(mol2.atoms) |
|
|
474 |
|
|
|
475 |
def nodes_size(block): |
|
|
476 |
out = OrderedDict() |
|
|
477 |
current_key = None |
|
|
478 |
for line in block.split('\n'): |
|
|
479 |
if line[:4] == 'ROOT' or line[:6] == 'BRANCH': |
|
|
480 |
current_key = line.strip() |
|
|
481 |
out[current_key] = 0 |
|
|
482 |
elif line[:4] == 'ATOM': |
|
|
483 |
out[current_key] += 1 |
|
|
484 |
return list(out.values()) |
|
|
485 |
|
|
|
486 |
# check the branch order and size |
|
|
487 |
if oddt.toolkit.backend == 'ob': |
|
|
488 |
assert_array_equal(nodes_size(mol.write('pdbqt')), |
|
|
489 |
[6, 8, 2, 7]) |
|
|
490 |
else: |
|
|
491 |
assert_array_equal(nodes_size(mol.write('pdbqt')), |
|
|
492 |
[8, 6, 7, 2]) |
|
|
493 |
ligand_file = os.path.join(test_data_dir, 'data', 'dude', 'xiap', |
|
|
494 |
'crystal_ligand.sdf') |
|
|
495 |
mol = next(oddt.toolkit.readfile('sdf', ligand_file)) |
|
|
496 |
assert_array_equal(nodes_size(mol.write('pdbqt')), |
|
|
497 |
[8, 3, 6, 6, 1, 6, 3, 2, 2]) |
|
|
498 |
|
|
|
499 |
# roundtrip a disconnected fragments |
|
|
500 |
mol = oddt.toolkit.readstring('smi', 'c1ccccc1.c1ccccc1C') |
|
|
501 |
if oddt.toolkit.backend == 'ob': |
|
|
502 |
kwargs = {'opt': {'r': None}} |
|
|
503 |
else: |
|
|
504 |
kwargs = {'flexible': False} |
|
|
505 |
|
|
|
506 |
mol2 = oddt.toolkit.readstring('pdbqt', mol.write('pdbqt', **kwargs)) |
|
|
507 |
assert len(mol.atoms) == len(mol2.atoms) |
|
|
508 |
|
|
|
509 |
mol2 = oddt.toolkit.readstring('pdbqt', mol.write('pdbqt')) |
|
|
510 |
assert len(mol.atoms) == len(mol2.atoms) |
|
|
511 |
|
|
|
512 |
|
|
|
513 |
def test_residue_info(): |
|
|
514 |
"""Residue properties""" |
|
|
515 |
mol_file = os.path.join(test_data_dir, 'data', 'pdb', '3kwa_5Apocket.pdb') |
|
|
516 |
mol = next(oddt.toolkit.readfile('pdb', mol_file)) |
|
|
517 |
assert len(mol.residues) == 19 |
|
|
518 |
|
|
|
519 |
res = mol.residues[0] |
|
|
520 |
assert res.idx0 == 0 |
|
|
521 |
assert res.number == 92 |
|
|
522 |
assert res.chain == 'A' |
|
|
523 |
assert res.name == 'GLN' |
|
|
524 |
|
|
|
525 |
|
|
|
526 |
def test_canonize_ring_path(): |
|
|
527 |
"""Test canonic paths""" |
|
|
528 |
path0 = list(range(6)) |
|
|
529 |
path = deque(path0) |
|
|
530 |
path.rotate(3) |
|
|
531 |
|
|
|
532 |
assert canonize_ring_path(path) == path0 |
|
|
533 |
path.reverse() |
|
|
534 |
assert canonize_ring_path(path) == path0 |
|
|
535 |
|
|
|
536 |
with pytest.raises(ValueError): |
|
|
537 |
canonize_ring_path(tuple(range(6))) |