Diff of /tests/test_rdkitfixer.py [000000] .. [3b722e]

Switch to unified view

a b/tests/test_rdkitfixer.py
1
import os
2
import tempfile
3
4
from numpy.testing import assert_array_equal, assert_almost_equal
5
import pytest
6
7
try:
8
    import rdkit
9
    from rdkit import Chem
10
except ImportError:
11
    rdkit = None
12
13
if rdkit is not None:
14
    from oddt.toolkits.extras.rdkit.fixer import (AtomListToSubMol,
15
                                                  PreparePDBMol,
16
                                                  ExtractPocketAndLigand,
17
                                                  IsResidueConnected,
18
                                                  FetchStructure,
19
                                                  PrepareComplexes)
20
21
test_data_dir = os.path.dirname(os.path.abspath(__file__))
22
test_dir = os.path.join(test_data_dir, 'data', 'pdb')
23
24
25
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
26
def test_atom_list_to_submol():
27
    mol = Chem.MolFromSmiles('CCCCC(=O)O')
28
    submol = AtomListToSubMol(mol, range(3, 7))
29
    assert submol.GetNumAtoms() == 4
30
    assert submol.GetNumAtoms() == 4
31
    assert submol.GetNumBonds() == 3
32
    assert submol.GetBondBetweenAtoms(1, 2).GetBondType() == rdkit.Chem.rdchem.BondType.DOUBLE
33
34
    molfile = os.path.join(test_dir, '2qwe_Sbridge.pdb')
35
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
36
    assert mol.GetConformer().Is3D()
37
    submol = AtomListToSubMol(mol, range(6), includeConformer=True)
38
    assert submol.GetConformer().Is3D()
39
40
    # submol has residue info
41
    atom = submol.GetAtomWithIdx(0)
42
    info = atom.GetPDBResidueInfo()
43
    assert info.GetResidueName() == 'CYS'
44
    assert info.GetResidueNumber() == 92
45
46
    # test multiple conformers
47
    mol.AddConformer(mol.GetConformer())
48
    assert mol.GetNumConformers() == 2
49
    submol = AtomListToSubMol(mol, range(6), includeConformer=True)
50
    assert submol.GetNumConformers() == 2
51
52
    # FIXME: Newer RDKit has GetPositions, 2016.03 does not
53
    mol_conf = mol.GetConformer()
54
    submol_conf = submol.GetConformer()
55
    assert_array_equal([submol_conf.GetAtomPosition(i)
56
                        for i in range(submol_conf.GetNumAtoms())],
57
                       [mol_conf.GetAtomPosition(i) for i in range(6)])
58
59
    submol2 = AtomListToSubMol(submol, range(3), includeConformer=True)
60
    submol2_conf = submol2.GetConformer()
61
    assert submol2.GetNumConformers() == 2
62
    assert_array_equal([submol2_conf.GetAtomPosition(i)
63
                        for i in range(submol2_conf.GetNumAtoms())],
64
                       [mol_conf.GetAtomPosition(i) for i in range(3)])
65
66
67
@pytest.mark.skipif(rdkit is None or rdkit.__version__ < '2017.03', reason="RDKit required")
68
def test_multivalent_Hs():
69
    """Test if fixer deals with multivalent Hs"""
70
71
    # TODO: require mol without Hs in the future (rdkit v. 2018)
72
    molfile = os.path.join(test_dir, '2c92_hypervalentH.pdb')
73
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
74
    mol = PreparePDBMol(mol, residue_whitelist=[], removeHs=False)
75
76
    atom = mol.GetAtomWithIdx(84)
77
    assert atom.GetAtomicNum() == 1  # is it H
78
    assert atom.GetDegree() == 1  # H should have 1 bond
79
80
    for n in atom.GetNeighbors():  # Check if neighbor is from the same residue
81
        assert atom.GetPDBResidueInfo().GetResidueName() == n.GetPDBResidueInfo().GetResidueName()
82
83
    # mol can be sanitized
84
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
85
86
87
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
88
def test_HOH_bonding():
89
    """Test if fixer unbinds HOH"""
90
91
    molfile = os.path.join(test_dir, '2vnf_bindedHOH.pdb')
92
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
93
    # don't use templates and don't remove waters
94
    mol = PreparePDBMol(mol, removeHOHs=False)
95
96
    atom = mol.GetAtomWithIdx(5)
97
    assert atom.GetPDBResidueInfo().GetResidueName() == 'HOH'
98
    assert atom.GetDegree() == 0  # HOH should have no bonds
99
100
    # mol can be sanitized
101
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
102
103
104
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
105
def test_metal_bonding():
106
    """Test if fixer disconnects metals"""
107
108
    molfile = os.path.join(test_dir, '1ps3_zn.pdb')
109
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
110
111
    mol = PreparePDBMol(mol)
112
113
    atom = mol.GetAtomWithIdx(36)
114
    assert atom.GetAtomicNum() == 30  # is it Zn
115
    assert atom.GetDegree() == 0  # Zn should have no bonds
116
    assert atom.GetFormalCharge() == 2
117
    assert atom.GetNumExplicitHs() == 0
118
119
    # mol can be sanitized
120
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
121
122
123
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
124
def test_interresidue_bonding():
125
    """Test if fixer removes wrong connections between residues"""
126
127
    molfile = os.path.join(test_dir, '4e6d_residues.pdb')
128
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
129
130
    mol = PreparePDBMol(mol)
131
132
    # check if O from PRO
133
    atom1 = mol.GetAtomWithIdx(11)
134
    assert atom1.GetAtomicNum() == 8
135
    assert atom1.GetPDBResidueInfo().GetResidueName() == 'PRO'
136
    # ...and N from GLN
137
    atom2 = mol.GetAtomWithIdx(22)
138
    assert atom2.GetAtomicNum() == 7
139
    assert atom2.GetPDBResidueInfo().GetResidueName() == 'GLN'
140
    # ...are not connected
141
    assert mol.GetBondBetweenAtoms(11, 22) is None
142
143
    # mol can be sanitized
144
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
145
146
147
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
148
def test_intraresidue_bonding():
149
    """Test if fixer removes wrong connections within single residue"""
150
151
    molfile = os.path.join(test_dir, '1idg_connectivity.pdb')
152
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
153
    mol = PreparePDBMol(mol)
154
155
    # check if N and C from GLU20 are not connected
156
    atom1 = mol.GetAtomWithIdx(11)
157
    assert atom1.GetAtomicNum() == 7
158
    assert atom1.GetPDBResidueInfo().GetResidueName() == 'GLU'
159
    assert atom1.GetPDBResidueInfo().GetResidueNumber() == 20
160
    atom2 = mol.GetAtomWithIdx(13)
161
    assert atom2.GetAtomicNum() == 6
162
    assert atom2.GetPDBResidueInfo().GetResidueName() == 'GLU'
163
    assert atom2.GetPDBResidueInfo().GetResidueNumber() == 20
164
165
    assert mol.GetBondBetweenAtoms(11, 13) is None
166
167
    # mol can be sanitized
168
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
169
170
171
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
172
def test_bondtype():
173
    """Test if fixer deals with non-standard residue and fixes bond types"""
174
175
    molfile = os.path.join(test_dir, '3rsb_bondtype.pdb')
176
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
177
    mol = PreparePDBMol(mol)
178
179
    # check if there is double bond between N and C from MSE
180
    atom1 = mol.GetAtomWithIdx(13)
181
    assert atom1.GetAtomicNum() == 6
182
    assert atom1.GetPDBResidueInfo().GetResidueName() == 'MSE'
183
    atom2 = mol.GetAtomWithIdx(14)
184
    assert atom2.GetAtomicNum() == 8
185
    assert atom2.GetPDBResidueInfo().GetResidueName() == 'MSE'
186
187
    # there is a bond and it is double
188
    bond = mol.GetBondBetweenAtoms(13, 14)
189
    assert bond is not None
190
    assert_almost_equal(bond.GetBondTypeAsDouble(), 2.0)
191
192
    # mol can be sanitized
193
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
194
195
196
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
197
def test_ring():
198
    """Test if fixer adds missing bond in ring"""
199
200
    molfile = os.path.join(test_dir, '4yzm_ring.pdb')
201
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
202
    mol = PreparePDBMol(mol)
203
204
    # check if there is double bond between N and C from MSE
205
    atom1 = mol.GetAtomWithIdx(12)
206
    assert atom1.GetAtomicNum() == 6
207
    assert atom1.GetPDBResidueInfo().GetResidueName() == 'PHE'
208
    atom2 = mol.GetAtomWithIdx(13)
209
    assert atom2.GetAtomicNum() == 6
210
    assert atom2.GetPDBResidueInfo().GetResidueName() == 'PHE'
211
212
    # there is a bond and it is aromatic
213
    bond = mol.GetBondBetweenAtoms(12, 13)
214
    assert bond is not None
215
    assert_almost_equal(bond.GetBondTypeAsDouble(), 1.5)
216
217
    # mol can be sanitized
218
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
219
220
221
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
222
def test_sulphur_bridge():
223
    """Test sulphur bridges retention"""
224
225
    molfile = os.path.join(test_dir, '2qwe_Sbridge.pdb')
226
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
227
228
    mol = PreparePDBMol(mol)
229
230
    atom1 = mol.GetAtomWithIdx(5)
231
    atom2 = mol.GetAtomWithIdx(11)
232
    bond = mol.GetBondBetweenAtoms(atom1.GetIdx(), atom2.GetIdx())
233
    assert atom1.GetPDBResidueInfo().GetName().strip() == 'SG'
234
    assert atom1.GetPDBResidueInfo().GetResidueNumber() == 92
235
    assert atom2.GetPDBResidueInfo().GetName().strip() == 'SG'
236
    assert atom2.GetPDBResidueInfo().GetResidueNumber() == 417
237
    assert bond is not None
238
239
240
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
241
def test_pocket_extractor():
242
    """Test extracting pocket and ligand"""
243
244
    molfile = os.path.join(test_dir, '5ar7.pdb')
245
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
246
247
    # there should be no pocket at 1A
248
    pocket, ligand = ExtractPocketAndLigand(mol, cutoff=1.)
249
    assert pocket.GetNumAtoms() == 0
250
    assert ligand.GetNumAtoms() == 26
251
252
    # small pocket of 5A
253
    pocket, ligand = ExtractPocketAndLigand(mol, cutoff=12.)
254
    assert pocket.GetNumAtoms() == 928
255
    assert ligand.GetNumAtoms() == 26
256
257
    # check if HOH is in pocket
258
    atom = pocket.GetAtomWithIdx(910)
259
    assert atom.GetAtomicNum() == 8
260
    assert atom.GetPDBResidueInfo().GetResidueName() == 'HOH'
261
262
    # Prepare and sanitize pocket and ligand
263
    pocket = PreparePDBMol(pocket)
264
    ligand = PreparePDBMol(ligand)
265
    assert Chem.SanitizeMol(pocket) == Chem.SanitizeFlags.SANITIZE_NONE
266
    assert Chem.SanitizeMol(ligand) == Chem.SanitizeFlags.SANITIZE_NONE
267
268
    # Check atom/bond properies for both molecules
269
    bond = pocket.GetBondWithIdx(39)
270
    assert bond.GetIsAromatic()
271
    assert bond.GetBeginAtom().GetPDBResidueInfo().GetResidueName() == 'TYR'
272
273
    atom = ligand.GetAtomWithIdx(22)
274
    assert atom.GetAtomicNum() == 7
275
    assert atom.GetIsAromatic()
276
    assert atom.GetPDBResidueInfo().GetResidueName() == 'SR8'
277
278
    # test if metal is in pocket
279
    molfile = os.path.join(test_dir, '4p6p_lig_zn.pdb')
280
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
281
    assert mol.GetNumAtoms() == 176
282
    pocket, ligand = ExtractPocketAndLigand(mol, cutoff=5.)
283
    assert pocket.GetNumAtoms() == 162
284
    assert ligand.GetNumAtoms() == 14
285
286
    atom = pocket.GetAtomWithIdx(153)
287
    assert atom.GetPDBResidueInfo().GetResidueName().strip() == 'ZN'
288
    atom = pocket.GetAtomWithIdx(160)
289
    assert atom.GetPDBResidueInfo().GetResidueName() == 'HOH'
290
291
    pocket, ligand = ExtractPocketAndLigand(mol, cutoff=5., expandResidues=False)
292
    assert pocket.GetNumAtoms() == 74
293
    assert ligand.GetNumAtoms() == 14
294
295
    atom = pocket.GetAtomWithIdx(65)
296
    assert atom.GetPDBResidueInfo().GetResidueName().strip() == 'ZN'
297
    atom = pocket.GetAtomWithIdx(73)
298
    assert atom.GetPDBResidueInfo().GetResidueName() == 'HOH'
299
300
    # ligand and protein white/blacklist
301
    molfile = os.path.join(test_dir, '1dy3_2LIG.pdb')
302
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
303
304
    # by default the largest ligand - ATP
305
    pocket, ligand = ExtractPocketAndLigand(mol, cutoff=20.)
306
    assert pocket.GetNumAtoms() == 304
307
    assert ligand.GetNumAtoms() == 31
308
309
    atom = ligand.GetAtomWithIdx(0)
310
    assert atom.GetPDBResidueInfo().GetResidueName() == 'ATP'
311
312
    # blacklist APT to get other largest ligand - 87Y
313
    pocket, ligand = ExtractPocketAndLigand(mol, cutoff=20.,
314
                                            ligand_residue_blacklist=['ATP'])
315
    assert pocket.GetNumAtoms() == 304
316
    assert ligand.GetNumAtoms() == 23
317
318
    atom = ligand.GetAtomWithIdx(0)
319
    assert atom.GetPDBResidueInfo().GetResidueName() == '87Y'
320
321
    # point to 87Y explicitly
322
    pocket, ligand = ExtractPocketAndLigand(mol, cutoff=20.,
323
                                            ligand_residue='87Y')
324
    assert pocket.GetNumAtoms() == 304
325
    assert ligand.GetNumAtoms() == 23
326
327
    atom = ligand.GetAtomWithIdx(0)
328
    assert atom.GetPDBResidueInfo().GetResidueName() == '87Y'
329
330
    # include APT in pocket to get other largest ligand - 87Y
331
    pocket, ligand = ExtractPocketAndLigand(mol, cutoff=20.,
332
                                            append_residues=['ATP'])
333
    assert pocket.GetNumAtoms() == 304+31
334
    assert ligand.GetNumAtoms() == 23
335
336
    atom = ligand.GetAtomWithIdx(0)
337
    assert atom.GetPDBResidueInfo().GetResidueName() == '87Y'
338
339
    atom = pocket.GetAtomWithIdx(310)
340
    assert atom.GetPDBResidueInfo().GetResidueName() == 'ATP'
341
342
343
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
344
def test_aromatic_ring():
345
    """Test aromaticity for partial matches"""
346
347
    # ring is complete and should be aromatic
348
    molfile = os.path.join(test_dir, '5ar7_HIS.pdb')
349
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
350
    mol = PreparePDBMol(mol)
351
352
    atom = mol.GetAtomWithIdx(6)
353
    assert atom.GetAtomicNum() == 7
354
    info = atom.GetPDBResidueInfo()
355
    assert info.GetResidueName() == 'HIS'
356
    assert info.GetResidueNumber() == 246
357
    assert info.GetName().strip() == 'ND1'
358
    assert atom.GetIsAromatic()
359
360
    atom = mol.GetAtomWithIdx(9)
361
    assert atom.GetAtomicNum() == 7
362
    info = atom.GetPDBResidueInfo()
363
    assert info.GetResidueName() == 'HIS'
364
    assert info.GetResidueNumber() == 246
365
    assert info.GetName().strip() == 'NE2'
366
    assert atom.GetIsAromatic()
367
368
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
369
370
    # there is only one atom from the ring and it shouldn't be aromatic
371
    molfile = os.path.join(test_dir, '3cx9_TYR.pdb')
372
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
373
    mol = PreparePDBMol(mol)
374
375
    atom = mol.GetAtomWithIdx(14)
376
    assert atom.GetAtomicNum() == 6
377
    info = atom.GetPDBResidueInfo()
378
    assert info.GetResidueName() == 'TYR'
379
    assert info.GetResidueNumber() == 138
380
    assert info.GetName().strip() == 'CG'
381
    assert not atom.GetIsAromatic()
382
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
383
384
385
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
386
def test_many_missing():
387
    """Test parsing residues with **many** missing atoms and bonds"""
388
389
    molfile = os.path.join(test_dir, '2wb5_GLN.pdb')
390
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
391
    mol = PreparePDBMol(mol)
392
393
    assert mol.GetNumAtoms() == 5
394
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
395
396
    assert mol.GetAtomWithIdx(4).GetDegree() == 0
397
398
    # test if removal works
399
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
400
    mol = PreparePDBMol(mol, remove_incomplete=True)
401
402
    assert mol.GetNumAtoms() == 0
403
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
404
405
406
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
407
def test_remove_incomplete():
408
    """Test removing residues with missing atoms"""
409
410
    molfile = os.path.join(test_dir, '3cx9_TYR.pdb')
411
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
412
413
    # keep all residues
414
    new_mol = PreparePDBMol(mol, remove_incomplete=False)
415
    assert new_mol.GetNumAtoms() == 23
416
    residues = set()
417
    for atom in new_mol.GetAtoms():
418
        residues.add(atom.GetPDBResidueInfo().GetResidueNumber())
419
    assert residues, {137, 138 == 139}
420
    assert Chem.SanitizeMol(new_mol) == Chem.SanitizeFlags.SANITIZE_NONE
421
422
    # remove residue with missing sidechain
423
    new_mol = PreparePDBMol(mol, remove_incomplete=True)
424
    assert new_mol.GetNumAtoms() == 17
425
    residues = set()
426
    for atom in new_mol.GetAtoms():
427
        residues.add(atom.GetPDBResidueInfo().GetResidueNumber())
428
    assert residues, {137 == 139}
429
    assert Chem.SanitizeMol(new_mol) == Chem.SanitizeFlags.SANITIZE_NONE
430
431
432
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
433
def test_custom_templates():
434
    """Test using custom templates"""
435
436
    molfile = os.path.join(test_dir, '3cx9_TYR.pdb')
437
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
438
439
    templates = {
440
        'TYR': 'CCC(N)C=O',
441
        'LYS': 'NC(C(O)=O)CCCCN',
442
        'LEU': 'CC(C)CC(N)C(=O)O',
443
    }
444
445
    mol_templates = {resname: Chem.MolFromSmiles(smi)
446
                     for resname, smi in templates.items()}
447
448
    for kwargs in ({'custom_templates': {'TYR': 'CCC(N)C=O'}},
449
                   {'custom_templates': {'TYR': Chem.MolFromSmiles('CCC(N)C=O')}},
450
                   {'custom_templates': templates, 'replace_default_templates': True},
451
                   {'custom_templates': mol_templates, 'replace_default_templates': True}):
452
453
        # use TYR without sidechain - all matches should be complete
454
        new_mol = PreparePDBMol(mol, remove_incomplete=True, **kwargs)
455
        assert new_mol.GetNumAtoms() == 23
456
        residues = set()
457
        for atom in new_mol.GetAtoms():
458
            residues.add(atom.GetPDBResidueInfo().GetResidueNumber())
459
        assert residues, {137, 138 == 139}
460
        assert Chem.SanitizeMol(new_mol) == Chem.SanitizeFlags.SANITIZE_NONE
461
462
463
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
464
def test_add_missing_atoms():
465
    # add missing atom at tryptophan
466
    molfile = os.path.join(test_dir, '5dhh_missingatomTRP.pdb')
467
    mol = Chem.MolFromPDBFile(molfile, sanitize=True)
468
    mol = Chem.RemoveHs(mol, sanitize=False)
469
470
    assert mol.GetNumAtoms() == 26
471
    mol = PreparePDBMol(mol, add_missing_atoms=True)
472
    assert mol.GetNumAtoms() == 27
473
474
    atom = mol.GetAtomWithIdx(21)
475
    assert atom.GetAtomicNum() == 6
476
    info = atom.GetPDBResidueInfo()
477
    assert info.GetResidueName() == 'TRP'
478
    assert info.GetResidueNumber() == 175
479
    assert info.GetName().strip() == 'C9'
480
    assert atom.IsInRing()
481
    assert atom.GetIsAromatic()
482
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
483
484
    # add whole ring to tyrosine
485
    molfile = os.path.join(test_dir, '3cx9_TYR.pdb')
486
    mol = Chem.MolFromPDBFile(molfile, sanitize=True)
487
    mol = Chem.RemoveHs(mol, sanitize=False)
488
489
    assert mol.GetNumAtoms() == 23
490
    mol = PreparePDBMol(mol, add_missing_atoms=True)
491
    assert mol.GetNumAtoms() == 29
492
493
    atom = mol.GetAtomWithIdx(17)
494
    assert atom.GetAtomicNum() == 6
495
    info = atom.GetPDBResidueInfo()
496
    assert info.GetResidueName() == 'TYR'
497
    assert info.GetResidueNumber() == 138
498
    assert info.GetName().strip() == 'C6'
499
    assert atom.IsInRing()
500
    assert atom.GetIsAromatic()
501
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE
502
503
    # missing protein backbone atoms
504
    molfile = os.path.join(test_dir, '5ar7_HIS.pdb')
505
    mol = Chem.MolFromPDBFile(molfile, sanitize=False)
506
    mol = Chem.RemoveHs(mol, sanitize=False)
507
508
    assert mol.GetNumAtoms() == 21
509
    assert mol.GetNumBonds() == 19
510
    mol = PreparePDBMol(mol, add_missing_atoms=True)
511
    assert mol.GetNumAtoms() == 25
512
    assert mol.GetNumBonds() == 25
513
514
    # missing nucleotide backbone atoms
515
    molfile = os.path.join(test_dir, '1bpx_missingBase.pdb')
516
    mol = Chem.MolFromPDBFile(molfile, sanitize=False)
517
    mol = Chem.RemoveHs(mol, sanitize=False)
518
519
    assert mol.GetNumAtoms() == 301
520
    assert mol.GetNumBonds() == 333
521
    mol = PreparePDBMol(mol, add_missing_atoms=True)
522
    assert mol.GetNumAtoms() == 328
523
    assert mol.GetNumBonds() == 366
524
525
526
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
527
def test_connected_residues():
528
    molfile = os.path.join(test_dir, '4p6p_lig_zn.pdb')
529
    mol = Chem.MolFromPDBFile(molfile, sanitize=False, removeHs=False)
530
    mol = PreparePDBMol(mol)    # we need to use fixer with rdkit < 2018
531
532
    # residue which has neighbours
533
    assert IsResidueConnected(mol, range(120, 127))
534
535
    # ligand
536
    assert not IsResidueConnected(mol, range(153, 167))
537
538
    # fragments of two residues
539
    with pytest.raises(ValueError):
540
        IsResidueConnected(mol, range(5, 15))
541
542
543
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
544
def test_fetch_structures():
545
    pdbid = '3ws8'
546
    tmpdir = tempfile.mkdtemp()
547
548
    mol1 = FetchStructure(pdbid)
549
    mol2 = FetchStructure(pdbid, cache_dir=tmpdir)
550
    mol3 = FetchStructure(pdbid, cache_dir=tmpdir)
551
    assert mol1.GetNumAtoms() == mol2.GetNumAtoms()
552
    assert mol1.GetNumAtoms() == mol3.GetNumAtoms()
553
554
555
@pytest.mark.skipif(rdkit is None, reason="RDKit required")
556
def test_prepare_complexes():
557
    ids = [
558
        '3WS9',    # simple case with everything fine
559
        '3HLJ',    # ligand not in report
560
        '3BYM',    # non-existing ligand and backbone residue in report
561
        '2PIN',    # two ligands with binding affinities
562
        '3CYU',    # can't parse ligands properly
563
        '1A28',    # multiple affinity types
564
    ]
565
566
    tmpdir = tempfile.mkdtemp()
567
    complexes = PrepareComplexes(ids, cache_dir=tmpdir)
568
    expected_values = {
569
        '3WS9': {'X4D': {'IC50': 92.0}},
570
        '3BYM': {'AM0': {'IC50': 6.0}},
571
        '2PIN': {'LEG': {'IC50': 1500.0}},
572
        '3CYU': {'0CR': {'Kd': 60.0}},
573
        '1A28': {'STR': {'Ki': 5.1}},
574
    }
575
576
    values = {}
577
    for pdbid, pairs in complexes.items():
578
        values[pdbid] = {}
579
        for resname, (_, ligand) in pairs.items():
580
            values[pdbid][resname] = {k: float(v) for k, v
581
                                      in ligand.GetPropsAsDict().items()}
582
583
    assert expected_values.keys() == values.keys()
584
585
    for pdbid in expected_values:
586
        assert values[pdbid].keys() == expected_values[pdbid].keys()
587
        for resname in values[pdbid]:
588
            assert values[pdbid][resname].keys() == expected_values[pdbid][resname].keys()
589
            for key, val in values[pdbid][resname].items():
590
                assert key in expected_values[pdbid][resname]
591
                assert_almost_equal(expected_values[pdbid][resname][key], val)
592
    for idx in expected_values:
593
        assert os.path.exists(os.path.join(tmpdir, idx,
594
                                           '%s.pdb' % idx))