Switch to unified view

a b/tests/test_virtualscreening.py
1
import os
2
from tempfile import mkdtemp, NamedTemporaryFile
3
4
import pytest
5
from numpy.testing import assert_array_equal, assert_array_almost_equal
6
7
import pandas as pd
8
9
import oddt
10
from oddt.utils import method_caller
11
from oddt.spatial import rmsd
12
from oddt.scoring import scorer
13
from oddt.scoring.functions import rfscore, nnscore
14
from oddt.virtualscreening import virtualscreening
15
16
test_data_dir = os.path.dirname(os.path.abspath(__file__))
17
18
# common file names
19
dude_data_dir = os.path.join(test_data_dir, 'data', 'dude', 'xiap')
20
xiap_crystal_ligand = os.path.join(dude_data_dir, 'crystal_ligand.sdf')
21
xiap_protein = os.path.join(dude_data_dir, 'receptor_rdkit.pdb')
22
xiap_actives_docked = os.path.join(dude_data_dir, 'actives_docked.sdf')
23
24
25
def test_vs_scoring_vina():
26
    """VS scoring (Vina) tests"""
27
    vs = virtualscreening(n_cpu=1)
28
    vs.load_ligands('sdf', xiap_crystal_ligand)
29
    vs.score(function='autodock_vina', protein=xiap_protein)
30
    mols = list(vs.fetch())
31
    assert len(mols) == 1
32
    mol_data = mols[0].data
33
    assert 'vina_affinity' in mol_data
34
    assert 'vina_gauss1' in mol_data
35
    assert 'vina_gauss2' in mol_data
36
    assert 'vina_hydrogen' in mol_data
37
    assert 'vina_hydrophobic' in mol_data
38
    assert 'vina_repulsion' in mol_data
39
    assert mol_data['vina_affinity'] == '-3.57594'
40
    assert mol_data['vina_gauss1'] == '63.01213'
41
    assert mol_data['vina_gauss2'] == '999.07625'
42
    assert mol_data['vina_hydrogen'] == '0.0'
43
    assert mol_data['vina_hydrophobic'] == '26.12648'
44
    assert mol_data['vina_repulsion'] == '3.63178'
45
46
47
def test_vs_docking():
48
    """VS docking (Vina) tests"""
49
    vs = virtualscreening(n_cpu=1)
50
    vs.load_ligands('sdf', xiap_crystal_ligand)
51
52
    # bad docking engine
53
    with pytest.raises(ValueError):
54
        vs.dock('srina', 'prot.pdb')
55
56
    vs.dock(engine='autodock_vina',
57
            protein=xiap_protein,
58
            auto_ligand=xiap_crystal_ligand,
59
            exhaustiveness=1,
60
            energy_range=6,
61
            num_modes=7,
62
            size=(20, 20, 20),
63
            seed=0)
64
    mols = list(vs.fetch())
65
    assert len(mols) == 7
66
    mol_data = mols[0].data
67
    assert 'vina_affinity' in mol_data
68
    assert 'vina_rmsd_lb' in mol_data
69
    assert 'vina_rmsd_ub' in mol_data
70
    vina_scores = [-6.3, -6.0, -5.8, -5.8, -3.9, -3.0, -1.1]
71
    assert_array_equal([float(m.data['vina_affinity']) for m in mols], vina_scores)
72
73
    # verify the SMILES of molecules
74
    ref_mol = next(oddt.toolkit.readfile('sdf', xiap_crystal_ligand))
75
76
    if oddt.toolkit.backend == 'ob':
77
        # OB 2.3.2 will fail the following, since Hs are removed, etc.
78
        # OB 2.4 recognizes the smiles chirality wrong
79
        pass
80
    else:
81
        vina_rmsd = [8.153314, 5.32554, 8.514586, 8.510169, 9.060128, 8.995098,
82
                     8.626776]
83
        assert_array_equal([mol.smiles for mol in mols],
84
                           [ref_mol.smiles] * len(mols))
85
86
        assert_array_almost_equal([rmsd(ref_mol, mol, method='min_symmetry')
87
                                   for mol in mols], vina_rmsd)
88
89
90
def test_vs_empty():
91
    vs = virtualscreening(n_cpu=1)
92
    with pytest.raises(StopIteration, match='no molecules loaded'):
93
        vs.fetch()
94
95
96
def test_vs_docking_empty():
97
    vs = virtualscreening(n_cpu=1)
98
    vs.load_ligands('smi', os.path.join(dude_data_dir, 'actives_rdkit.smi'))
99
100
    vs.dock(engine='autodock_vina',
101
            protein=xiap_protein,
102
            auto_ligand=xiap_crystal_ligand,
103
            exhaustiveness=1,
104
            energy_range=5,
105
            num_modes=9,
106
            size=(20, 20, 20),
107
            seed=0)
108
109
    with pytest.raises(ValueError, match='has no 3D coordinates'):
110
        next(vs.fetch())
111
112
113
def test_vs_multithreading_fallback():
114
    vs = virtualscreening(n_cpu=8)
115
    vs.load_ligands('sdf', xiap_crystal_ligand)
116
117
    vs.score(function='autodock_vina', protein=xiap_protein)
118
119
    with pytest.warns(UserWarning, match='Falling back to sub-methods multithreading'):
120
        method_caller(vs, 'fetch')
121
122
123
if oddt.toolkit.backend == 'ob':  # RDKit rewrite needed
124
    def test_vs_filtering():
125
        """VS preset filtering tests"""
126
        vs = virtualscreening(n_cpu=1)
127
128
        vs.load_ligands('sdf', xiap_actives_docked)
129
        vs.apply_filter('ro5', soft_fail=1)
130
        assert len(list(vs.fetch())) == 49
131
132
        vs.load_ligands('sdf', xiap_actives_docked)
133
        vs.apply_filter('ro3', soft_fail=2)
134
        assert len(list(vs.fetch())) == 9
135
136
137
def test_vs_pains():
138
    """VS PAINS filter tests"""
139
    vs = virtualscreening(n_cpu=1)
140
    # TODO: add some failing molecules
141
    vs.load_ligands('sdf', xiap_actives_docked)
142
    vs.apply_filter('pains', soft_fail=0)
143
    assert len(list(vs.fetch())) == 100
144
145
146
def test_vs_similarity():
147
    """VS similarity filter (USRs, IFPs) tests"""
148
    ref_mol = next(oddt.toolkit.readfile('sdf', xiap_crystal_ligand))
149
    receptor = next(oddt.toolkit.readfile('pdb', xiap_protein))
150
151
    # following toolkit differences is due to different Hs treatment
152
    vs = virtualscreening(n_cpu=1, chunksize=10)
153
    vs.load_ligands('sdf', xiap_actives_docked)
154
    vs.similarity('usr', cutoff=0.4, query=ref_mol)
155
    if oddt.toolkit.backend == 'ob':
156
        assert len(list(vs.fetch())) == 11
157
    else:
158
        assert len(list(vs.fetch())) == 6
159
160
    vs = virtualscreening(n_cpu=1)
161
    vs.load_ligands('sdf', xiap_actives_docked)
162
    vs.similarity('usr_cat', cutoff=0.3, query=ref_mol)
163
    if oddt.toolkit.backend == 'ob':
164
        assert len(list(vs.fetch())) == 16
165
    else:
166
        assert len(list(vs.fetch())) == 11
167
168
    vs = virtualscreening(n_cpu=1)
169
    vs.load_ligands('sdf', xiap_actives_docked)
170
    vs.similarity('electroshape', cutoff=0.45, query=ref_mol)
171
    if oddt.toolkit.backend == 'ob':
172
        assert len(list(vs.fetch())) == 55
173
    else:
174
        assert len(list(vs.fetch())) == 95
175
176
    vs = virtualscreening(n_cpu=1)
177
    vs.load_ligands('sdf', xiap_actives_docked)
178
    vs.similarity('ifp', cutoff=0.8, query=ref_mol, protein=receptor)
179
    assert len(list(vs.fetch())) == 33
180
181
    vs = virtualscreening(n_cpu=1)
182
    vs.load_ligands('sdf', xiap_actives_docked)
183
    vs.similarity('sifp', cutoff=0.8, query=ref_mol, protein=receptor)
184
    assert len(list(vs.fetch())) == 33
185
186
    # test wrong method error
187
    with pytest.raises(ValueError):
188
        vs.similarity('sift', query=ref_mol)
189
190
191
def test_vs_scoring():
192
    protein = next(oddt.toolkit.readfile('pdb', xiap_protein))
193
    protein.protein = True
194
195
    data_dir = os.path.join(test_data_dir, 'data')
196
    home_dir = mkdtemp()
197
    pdbbind_versions = (2007, 2013, 2016)
198
199
    pdbbind_dir = os.path.join(data_dir, 'pdbbind')
200
    for pdbbind_v in pdbbind_versions:
201
        version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v)
202
        if not os.path.isdir(version_dir):
203
            os.symlink(pdbbind_dir, version_dir)
204
205
    filenames = []
206
    # train mocked SFs
207
    for model in [nnscore(n_jobs=1)] + [rfscore(version=v, n_jobs=1)
208
                                        for v in [1, 2, 3]]:
209
            model.gen_training_data(data_dir, pdbbind_versions=pdbbind_versions,
210
                                    home_dir=home_dir)
211
            filenames.append(model.train(home_dir=home_dir))
212
    vs = virtualscreening(n_cpu=-1, chunksize=10)
213
    vs.load_ligands('sdf', xiap_actives_docked)
214
    # error if no protein is fed
215
    with pytest.raises(ValueError):
216
        vs.score('nnscore')
217
    # bad sf name
218
    with pytest.raises(ValueError):
219
        vs.score('bad_sf', protein=protein)
220
    vs.score('nnscore', protein=xiap_protein)
221
    vs.score('nnscore_pdbbind2016', protein=protein)
222
    vs.score('rfscore_v1', protein=protein)
223
    vs.score('rfscore_v1_pdbbind2016', protein=protein)
224
    vs.score('rfscore_v2', protein=protein)
225
    vs.score('rfscore_v3', protein=protein)
226
    vs.score('pleclinear', protein=protein)
227
    vs.score('pleclinear_p5_l1_s65536_pdbbind2016', protein=protein)
228
    # use pickle directly
229
    vs.score(filenames[0], protein=protein)
230
    # pass SF object directly
231
    vs.score(scorer.load(filenames[0]), protein=protein)
232
    # pass wrong object (sum is not an instance of scorer)
233
    with pytest.raises(ValueError):
234
        vs.score(sum, protein=protein)
235
236
    mols = list(vs.fetch())
237
238
    assert len(mols) == 100
239
    mol_data = mols[0].data
240
    assert 'nnscore' in mol_data
241
    assert 'rfscore_v1' in mol_data
242
    assert 'rfscore_v2' in mol_data
243
    assert 'rfscore_v3' in mol_data
244
    assert 'PLEClinear_p5_l1_s65536' in mol_data
245
246
    vs = virtualscreening(n_cpu=-1, chunksize=10)
247
    vs.load_ligands('sdf', xiap_actives_docked)
248
    vs.score('nnscore', protein=protein)
249
    vs.score('rfscore_v1', protein=protein)
250
    vs.score('rfscore_v2', protein=protein)
251
    vs.score('rfscore_v3', protein=protein)
252
    with NamedTemporaryFile('w', suffix='.sdf') as molfile:
253
        with NamedTemporaryFile('w', suffix='.csv') as csvfile:
254
            vs.write('sdf', molfile.name, csv_filename=csvfile.name)
255
            data = pd.read_csv(csvfile.name)
256
            assert 'nnscore' in data.columns
257
            assert 'rfscore_v1' in data.columns
258
            assert 'rfscore_v2' in data.columns
259
            assert 'rfscore_v3' in data.columns
260
261
            mols = list(oddt.toolkit.readfile('sdf', molfile.name))
262
            assert len(mols) == 100
263
264
            vs.write_csv(csvfile.name, fields=['nnscore', 'rfscore_v1',
265
                                               'rfscore_v2', 'rfscore_v3'])
266
            data = pd.read_csv(csvfile.name)
267
            assert len(data.columns) == 4
268
            assert len(data) == len(mols)
269
            assert 'nnscore' in data.columns
270
            assert 'rfscore_v1' in data.columns
271
            assert 'rfscore_v2' in data.columns
272
            assert 'rfscore_v3' in data.columns
273
274
    # remove files
275
    for f in filenames:
276
        os.unlink(f)
277
278
    # remove symlinks
279
    for pdbbind_v in pdbbind_versions:
280
        version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v)
281
        if os.path.islink(version_dir):
282
            os.unlink(version_dir)