|
a |
|
b/tests/test_virtualscreening.py |
|
|
1 |
import os |
|
|
2 |
from tempfile import mkdtemp, NamedTemporaryFile |
|
|
3 |
|
|
|
4 |
import pytest |
|
|
5 |
from numpy.testing import assert_array_equal, assert_array_almost_equal |
|
|
6 |
|
|
|
7 |
import pandas as pd |
|
|
8 |
|
|
|
9 |
import oddt |
|
|
10 |
from oddt.utils import method_caller |
|
|
11 |
from oddt.spatial import rmsd |
|
|
12 |
from oddt.scoring import scorer |
|
|
13 |
from oddt.scoring.functions import rfscore, nnscore |
|
|
14 |
from oddt.virtualscreening import virtualscreening |
|
|
15 |
|
|
|
16 |
test_data_dir = os.path.dirname(os.path.abspath(__file__)) |
|
|
17 |
|
|
|
18 |
# common file names |
|
|
19 |
dude_data_dir = os.path.join(test_data_dir, 'data', 'dude', 'xiap') |
|
|
20 |
xiap_crystal_ligand = os.path.join(dude_data_dir, 'crystal_ligand.sdf') |
|
|
21 |
xiap_protein = os.path.join(dude_data_dir, 'receptor_rdkit.pdb') |
|
|
22 |
xiap_actives_docked = os.path.join(dude_data_dir, 'actives_docked.sdf') |
|
|
23 |
|
|
|
24 |
|
|
|
25 |
def test_vs_scoring_vina(): |
|
|
26 |
"""VS scoring (Vina) tests""" |
|
|
27 |
vs = virtualscreening(n_cpu=1) |
|
|
28 |
vs.load_ligands('sdf', xiap_crystal_ligand) |
|
|
29 |
vs.score(function='autodock_vina', protein=xiap_protein) |
|
|
30 |
mols = list(vs.fetch()) |
|
|
31 |
assert len(mols) == 1 |
|
|
32 |
mol_data = mols[0].data |
|
|
33 |
assert 'vina_affinity' in mol_data |
|
|
34 |
assert 'vina_gauss1' in mol_data |
|
|
35 |
assert 'vina_gauss2' in mol_data |
|
|
36 |
assert 'vina_hydrogen' in mol_data |
|
|
37 |
assert 'vina_hydrophobic' in mol_data |
|
|
38 |
assert 'vina_repulsion' in mol_data |
|
|
39 |
assert mol_data['vina_affinity'] == '-3.57594' |
|
|
40 |
assert mol_data['vina_gauss1'] == '63.01213' |
|
|
41 |
assert mol_data['vina_gauss2'] == '999.07625' |
|
|
42 |
assert mol_data['vina_hydrogen'] == '0.0' |
|
|
43 |
assert mol_data['vina_hydrophobic'] == '26.12648' |
|
|
44 |
assert mol_data['vina_repulsion'] == '3.63178' |
|
|
45 |
|
|
|
46 |
|
|
|
47 |
def test_vs_docking(): |
|
|
48 |
"""VS docking (Vina) tests""" |
|
|
49 |
vs = virtualscreening(n_cpu=1) |
|
|
50 |
vs.load_ligands('sdf', xiap_crystal_ligand) |
|
|
51 |
|
|
|
52 |
# bad docking engine |
|
|
53 |
with pytest.raises(ValueError): |
|
|
54 |
vs.dock('srina', 'prot.pdb') |
|
|
55 |
|
|
|
56 |
vs.dock(engine='autodock_vina', |
|
|
57 |
protein=xiap_protein, |
|
|
58 |
auto_ligand=xiap_crystal_ligand, |
|
|
59 |
exhaustiveness=1, |
|
|
60 |
energy_range=6, |
|
|
61 |
num_modes=7, |
|
|
62 |
size=(20, 20, 20), |
|
|
63 |
seed=0) |
|
|
64 |
mols = list(vs.fetch()) |
|
|
65 |
assert len(mols) == 7 |
|
|
66 |
mol_data = mols[0].data |
|
|
67 |
assert 'vina_affinity' in mol_data |
|
|
68 |
assert 'vina_rmsd_lb' in mol_data |
|
|
69 |
assert 'vina_rmsd_ub' in mol_data |
|
|
70 |
vina_scores = [-6.3, -6.0, -5.8, -5.8, -3.9, -3.0, -1.1] |
|
|
71 |
assert_array_equal([float(m.data['vina_affinity']) for m in mols], vina_scores) |
|
|
72 |
|
|
|
73 |
# verify the SMILES of molecules |
|
|
74 |
ref_mol = next(oddt.toolkit.readfile('sdf', xiap_crystal_ligand)) |
|
|
75 |
|
|
|
76 |
if oddt.toolkit.backend == 'ob': |
|
|
77 |
# OB 2.3.2 will fail the following, since Hs are removed, etc. |
|
|
78 |
# OB 2.4 recognizes the smiles chirality wrong |
|
|
79 |
pass |
|
|
80 |
else: |
|
|
81 |
vina_rmsd = [8.153314, 5.32554, 8.514586, 8.510169, 9.060128, 8.995098, |
|
|
82 |
8.626776] |
|
|
83 |
assert_array_equal([mol.smiles for mol in mols], |
|
|
84 |
[ref_mol.smiles] * len(mols)) |
|
|
85 |
|
|
|
86 |
assert_array_almost_equal([rmsd(ref_mol, mol, method='min_symmetry') |
|
|
87 |
for mol in mols], vina_rmsd) |
|
|
88 |
|
|
|
89 |
|
|
|
90 |
def test_vs_empty(): |
|
|
91 |
vs = virtualscreening(n_cpu=1) |
|
|
92 |
with pytest.raises(StopIteration, match='no molecules loaded'): |
|
|
93 |
vs.fetch() |
|
|
94 |
|
|
|
95 |
|
|
|
96 |
def test_vs_docking_empty(): |
|
|
97 |
vs = virtualscreening(n_cpu=1) |
|
|
98 |
vs.load_ligands('smi', os.path.join(dude_data_dir, 'actives_rdkit.smi')) |
|
|
99 |
|
|
|
100 |
vs.dock(engine='autodock_vina', |
|
|
101 |
protein=xiap_protein, |
|
|
102 |
auto_ligand=xiap_crystal_ligand, |
|
|
103 |
exhaustiveness=1, |
|
|
104 |
energy_range=5, |
|
|
105 |
num_modes=9, |
|
|
106 |
size=(20, 20, 20), |
|
|
107 |
seed=0) |
|
|
108 |
|
|
|
109 |
with pytest.raises(ValueError, match='has no 3D coordinates'): |
|
|
110 |
next(vs.fetch()) |
|
|
111 |
|
|
|
112 |
|
|
|
113 |
def test_vs_multithreading_fallback(): |
|
|
114 |
vs = virtualscreening(n_cpu=8) |
|
|
115 |
vs.load_ligands('sdf', xiap_crystal_ligand) |
|
|
116 |
|
|
|
117 |
vs.score(function='autodock_vina', protein=xiap_protein) |
|
|
118 |
|
|
|
119 |
with pytest.warns(UserWarning, match='Falling back to sub-methods multithreading'): |
|
|
120 |
method_caller(vs, 'fetch') |
|
|
121 |
|
|
|
122 |
|
|
|
123 |
if oddt.toolkit.backend == 'ob': # RDKit rewrite needed |
|
|
124 |
def test_vs_filtering(): |
|
|
125 |
"""VS preset filtering tests""" |
|
|
126 |
vs = virtualscreening(n_cpu=1) |
|
|
127 |
|
|
|
128 |
vs.load_ligands('sdf', xiap_actives_docked) |
|
|
129 |
vs.apply_filter('ro5', soft_fail=1) |
|
|
130 |
assert len(list(vs.fetch())) == 49 |
|
|
131 |
|
|
|
132 |
vs.load_ligands('sdf', xiap_actives_docked) |
|
|
133 |
vs.apply_filter('ro3', soft_fail=2) |
|
|
134 |
assert len(list(vs.fetch())) == 9 |
|
|
135 |
|
|
|
136 |
|
|
|
137 |
def test_vs_pains(): |
|
|
138 |
"""VS PAINS filter tests""" |
|
|
139 |
vs = virtualscreening(n_cpu=1) |
|
|
140 |
# TODO: add some failing molecules |
|
|
141 |
vs.load_ligands('sdf', xiap_actives_docked) |
|
|
142 |
vs.apply_filter('pains', soft_fail=0) |
|
|
143 |
assert len(list(vs.fetch())) == 100 |
|
|
144 |
|
|
|
145 |
|
|
|
146 |
def test_vs_similarity(): |
|
|
147 |
"""VS similarity filter (USRs, IFPs) tests""" |
|
|
148 |
ref_mol = next(oddt.toolkit.readfile('sdf', xiap_crystal_ligand)) |
|
|
149 |
receptor = next(oddt.toolkit.readfile('pdb', xiap_protein)) |
|
|
150 |
|
|
|
151 |
# following toolkit differences is due to different Hs treatment |
|
|
152 |
vs = virtualscreening(n_cpu=1, chunksize=10) |
|
|
153 |
vs.load_ligands('sdf', xiap_actives_docked) |
|
|
154 |
vs.similarity('usr', cutoff=0.4, query=ref_mol) |
|
|
155 |
if oddt.toolkit.backend == 'ob': |
|
|
156 |
assert len(list(vs.fetch())) == 11 |
|
|
157 |
else: |
|
|
158 |
assert len(list(vs.fetch())) == 6 |
|
|
159 |
|
|
|
160 |
vs = virtualscreening(n_cpu=1) |
|
|
161 |
vs.load_ligands('sdf', xiap_actives_docked) |
|
|
162 |
vs.similarity('usr_cat', cutoff=0.3, query=ref_mol) |
|
|
163 |
if oddt.toolkit.backend == 'ob': |
|
|
164 |
assert len(list(vs.fetch())) == 16 |
|
|
165 |
else: |
|
|
166 |
assert len(list(vs.fetch())) == 11 |
|
|
167 |
|
|
|
168 |
vs = virtualscreening(n_cpu=1) |
|
|
169 |
vs.load_ligands('sdf', xiap_actives_docked) |
|
|
170 |
vs.similarity('electroshape', cutoff=0.45, query=ref_mol) |
|
|
171 |
if oddt.toolkit.backend == 'ob': |
|
|
172 |
assert len(list(vs.fetch())) == 55 |
|
|
173 |
else: |
|
|
174 |
assert len(list(vs.fetch())) == 95 |
|
|
175 |
|
|
|
176 |
vs = virtualscreening(n_cpu=1) |
|
|
177 |
vs.load_ligands('sdf', xiap_actives_docked) |
|
|
178 |
vs.similarity('ifp', cutoff=0.8, query=ref_mol, protein=receptor) |
|
|
179 |
assert len(list(vs.fetch())) == 33 |
|
|
180 |
|
|
|
181 |
vs = virtualscreening(n_cpu=1) |
|
|
182 |
vs.load_ligands('sdf', xiap_actives_docked) |
|
|
183 |
vs.similarity('sifp', cutoff=0.8, query=ref_mol, protein=receptor) |
|
|
184 |
assert len(list(vs.fetch())) == 33 |
|
|
185 |
|
|
|
186 |
# test wrong method error |
|
|
187 |
with pytest.raises(ValueError): |
|
|
188 |
vs.similarity('sift', query=ref_mol) |
|
|
189 |
|
|
|
190 |
|
|
|
191 |
def test_vs_scoring(): |
|
|
192 |
protein = next(oddt.toolkit.readfile('pdb', xiap_protein)) |
|
|
193 |
protein.protein = True |
|
|
194 |
|
|
|
195 |
data_dir = os.path.join(test_data_dir, 'data') |
|
|
196 |
home_dir = mkdtemp() |
|
|
197 |
pdbbind_versions = (2007, 2013, 2016) |
|
|
198 |
|
|
|
199 |
pdbbind_dir = os.path.join(data_dir, 'pdbbind') |
|
|
200 |
for pdbbind_v in pdbbind_versions: |
|
|
201 |
version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v) |
|
|
202 |
if not os.path.isdir(version_dir): |
|
|
203 |
os.symlink(pdbbind_dir, version_dir) |
|
|
204 |
|
|
|
205 |
filenames = [] |
|
|
206 |
# train mocked SFs |
|
|
207 |
for model in [nnscore(n_jobs=1)] + [rfscore(version=v, n_jobs=1) |
|
|
208 |
for v in [1, 2, 3]]: |
|
|
209 |
model.gen_training_data(data_dir, pdbbind_versions=pdbbind_versions, |
|
|
210 |
home_dir=home_dir) |
|
|
211 |
filenames.append(model.train(home_dir=home_dir)) |
|
|
212 |
vs = virtualscreening(n_cpu=-1, chunksize=10) |
|
|
213 |
vs.load_ligands('sdf', xiap_actives_docked) |
|
|
214 |
# error if no protein is fed |
|
|
215 |
with pytest.raises(ValueError): |
|
|
216 |
vs.score('nnscore') |
|
|
217 |
# bad sf name |
|
|
218 |
with pytest.raises(ValueError): |
|
|
219 |
vs.score('bad_sf', protein=protein) |
|
|
220 |
vs.score('nnscore', protein=xiap_protein) |
|
|
221 |
vs.score('nnscore_pdbbind2016', protein=protein) |
|
|
222 |
vs.score('rfscore_v1', protein=protein) |
|
|
223 |
vs.score('rfscore_v1_pdbbind2016', protein=protein) |
|
|
224 |
vs.score('rfscore_v2', protein=protein) |
|
|
225 |
vs.score('rfscore_v3', protein=protein) |
|
|
226 |
vs.score('pleclinear', protein=protein) |
|
|
227 |
vs.score('pleclinear_p5_l1_s65536_pdbbind2016', protein=protein) |
|
|
228 |
# use pickle directly |
|
|
229 |
vs.score(filenames[0], protein=protein) |
|
|
230 |
# pass SF object directly |
|
|
231 |
vs.score(scorer.load(filenames[0]), protein=protein) |
|
|
232 |
# pass wrong object (sum is not an instance of scorer) |
|
|
233 |
with pytest.raises(ValueError): |
|
|
234 |
vs.score(sum, protein=protein) |
|
|
235 |
|
|
|
236 |
mols = list(vs.fetch()) |
|
|
237 |
|
|
|
238 |
assert len(mols) == 100 |
|
|
239 |
mol_data = mols[0].data |
|
|
240 |
assert 'nnscore' in mol_data |
|
|
241 |
assert 'rfscore_v1' in mol_data |
|
|
242 |
assert 'rfscore_v2' in mol_data |
|
|
243 |
assert 'rfscore_v3' in mol_data |
|
|
244 |
assert 'PLEClinear_p5_l1_s65536' in mol_data |
|
|
245 |
|
|
|
246 |
vs = virtualscreening(n_cpu=-1, chunksize=10) |
|
|
247 |
vs.load_ligands('sdf', xiap_actives_docked) |
|
|
248 |
vs.score('nnscore', protein=protein) |
|
|
249 |
vs.score('rfscore_v1', protein=protein) |
|
|
250 |
vs.score('rfscore_v2', protein=protein) |
|
|
251 |
vs.score('rfscore_v3', protein=protein) |
|
|
252 |
with NamedTemporaryFile('w', suffix='.sdf') as molfile: |
|
|
253 |
with NamedTemporaryFile('w', suffix='.csv') as csvfile: |
|
|
254 |
vs.write('sdf', molfile.name, csv_filename=csvfile.name) |
|
|
255 |
data = pd.read_csv(csvfile.name) |
|
|
256 |
assert 'nnscore' in data.columns |
|
|
257 |
assert 'rfscore_v1' in data.columns |
|
|
258 |
assert 'rfscore_v2' in data.columns |
|
|
259 |
assert 'rfscore_v3' in data.columns |
|
|
260 |
|
|
|
261 |
mols = list(oddt.toolkit.readfile('sdf', molfile.name)) |
|
|
262 |
assert len(mols) == 100 |
|
|
263 |
|
|
|
264 |
vs.write_csv(csvfile.name, fields=['nnscore', 'rfscore_v1', |
|
|
265 |
'rfscore_v2', 'rfscore_v3']) |
|
|
266 |
data = pd.read_csv(csvfile.name) |
|
|
267 |
assert len(data.columns) == 4 |
|
|
268 |
assert len(data) == len(mols) |
|
|
269 |
assert 'nnscore' in data.columns |
|
|
270 |
assert 'rfscore_v1' in data.columns |
|
|
271 |
assert 'rfscore_v2' in data.columns |
|
|
272 |
assert 'rfscore_v3' in data.columns |
|
|
273 |
|
|
|
274 |
# remove files |
|
|
275 |
for f in filenames: |
|
|
276 |
os.unlink(f) |
|
|
277 |
|
|
|
278 |
# remove symlinks |
|
|
279 |
for pdbbind_v in pdbbind_versions: |
|
|
280 |
version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v) |
|
|
281 |
if os.path.islink(version_dir): |
|
|
282 |
os.unlink(version_dir) |