oddt / Git / Diff of /tests/test

Models:

Amanda-D/

oddt

Downloads: 1

Diff of /tests/test_toolkit.py [000000] .. [3b722e]

Switch to unified view

 b/tests/test_toolkit.py
+import os
+from collections import OrderedDict, deque
+from six.moves.cPickle import loads, dumps
+import numpy as np
+import pandas as pd
+import pytest
+from numpy.testing import assert_array_equal, assert_array_almost_equal
+import oddt
+from oddt.spatial import rmsd
+from oddt.toolkits.common import canonize_ring_path
+test_data_dir = os.path.dirname(os.path.abspath(__file__))
+xiap_receptor = os.path.join(test_data_dir, 'data', 'dude', 'xiap',
+                             'receptor_rdkit.pdb')
+xiap_actives = os.path.join(test_data_dir, 'data', 'dude', 'xiap',
+                            'actives_docked.sdf')
+def test_mol():
+    """Test common molecule operations"""
+    # Hydrogen manipulation in small molecules
+    mol = oddt.toolkit.readstring('smi', 'c1ccccc1O')
+    assert len(mol.atoms) == 7
+    mol.addh()
+    assert len(mol.atoms) == 13
+    mol.removeh()
+    mol.addh(only_polar=True)
+    assert len(mol.atoms) == 8
+    mol.removeh()
+    assert len(mol.atoms) == 7
+    # Hydrogen manipulation in proteins
+    protein = next(oddt.toolkit.readfile('pdb', xiap_receptor))
+    protein.protein = True
+    res_atoms_n = [6, 10, 8, 8, 7, 11, 8, 7, 6, 8, 5, 8, 12, 9, 5, 11, 8,
+, 7, 11, 4, 7, 14, 8, 12, 6, 7, 8, 9, 9, 9, 8, 5, 11,
+, 4, 11, 12, 5, 8, 4, 9, 4, 8, 9, 7, 9, 6, 11, 10, 6,
+, 4, 4, 8, 7, 8, 14, 9, 7, 6, 9, 8, 7, 14, 9, 9, 10, 5,
+, 14, 12, 7, 4, 6, 9, 12, 8, 8, 9, 9, 9, 4, 9, 9, 12,
+, 8, 8, 8, 10, 8, 7, 10, 11, 12, 6, 7, 8, 11, 8, 9, 4,
+, 9, 7, 9, 6, 6, 4, 4, 4, 8, 7, 8, 14, 9, 7, 6, 9, 8,
+, 14, 9, 9, 10, 5, 9, 14, 12, 7, 4, 8, 10, 8, 7, 1, 1]
+    res_atoms_n_addh = [12, 17, 17, 19, 14, 23, 14, 14, 11, 17, 10, 13, 21,
+, 10, 23, 19, 20, 14, 20, 7, 14, 24, 19, 21, 11,
+, 14, 21, 16, 17, 19, 10, 23, 10, 7, 20, 21, 10,
+, 7, 16, 7, 13, 21, 16, 21, 10, 20, 17, 10, 7, 7,
+, 19, 14, 13, 24, 21, 14, 11, 16, 13, 14, 24, 16,
+, 16, 10, 21, 24, 21, 14, 7, 10, 21, 21, 19, 19,
+, 17, 21, 7, 17, 16, 21, 19, 14, 14, 19, 17, 19,
+, 18, 25, 22, 11, 17, 21, 22, 21, 17, 7, 13, 21,
+, 21, 11, 11, 7, 7, 7, 19, 14, 13, 24, 21, 14,
+, 16, 13, 14, 24, 16, 17, 16, 10, 21, 24, 21, 14,
+, 20, 17, 19, 15, 1, 1]
+    res_atoms_n_polarh = [9, 12, 9, 9, 7, 16, 11, 7, 8, 9, 6, 10, 14, 11,
+, 16, 9, 12, 9, 12, 5, 9, 16, 9, 14, 8, 8, 11,
+, 11, 12, 9, 6, 16, 6, 5, 12, 14, 6, 9, 5, 11,
+, 10, 12, 8, 12, 7, 12, 12, 7, 5, 5, 5, 9, 9,
+, 16, 12, 7, 8, 11, 10, 7, 16, 11, 12, 11, 6,
+, 16, 14, 7, 5, 7, 12, 14, 9, 9, 11, 12, 12, 5,
+, 11, 14, 9, 11, 11, 9, 12, 9, 9, 12, 17, 15,
+, 8, 10, 13, 10, 12, 5, 10, 12, 8, 12, 7, 8, 5,
+, 5, 9, 9, 10, 16, 12, 7, 8, 11, 10, 7, 16, 11,
+, 11, 6, 12, 16, 14, 7, 5, 10, 12, 9, 9, 1, 1]
+    assert len(protein.atoms) == 1114
+    assert len(protein.residues) == 138
+    assert_array_equal([len(res.atoms) for res in protein.residues],
+                       res_atoms_n)
+    protein.addh()
+    assert len(protein.atoms) == 2170
+    assert len(protein.residues) == 138
+    assert_array_equal([len(res.atoms) for res in protein.residues],
+                       res_atoms_n_addh)
+    protein.removeh()
+    protein.addh(only_polar=True)
+    assert len(protein.atoms) == 1356
+    assert len(protein.residues) == 138
+    assert_array_equal([len(res.atoms) for res in protein.residues],
+                       res_atoms_n_polarh)
+    protein.removeh()
+    assert len(protein.atoms) == 1114
+    assert len(protein.residues) == 138
+    assert_array_equal([len(res.atoms) for res in protein.residues],
+                       res_atoms_n)
+def test_mol_calccharges():
+    mol = oddt.toolkit.readstring('smi', 'c1ccccc1O')
+    mol.addh()
+    with pytest.raises(ValueError):
+        mol.calccharges('mmff94aaaaaa')
+    for m in ['gasteiger', 'mmff94']:
+        mol.calccharges(m)
+        assert (np.array(mol.charges) != 0.).any()
+    protein = next(oddt.toolkit.readfile('pdb', xiap_receptor))
+    protein.protein = True
+    # for that protein mmff94 charges could not be generated
+    with pytest.raises(Exception):
+        protein.calccharges('mmff94')
+def test_toolkit_hoh():
+    """HOH residues splitting"""
+    pdb_block = """ATOM      1  C1  GLY     1       0.000   0.000   0.000  1.00  0.00           C
+ATOM      2  C2  GLY     1       0.000   0.000   0.000  1.00  0.00           C
+ATOM      3  O1  GLY     1       0.000   0.000   0.000  1.00  0.00           O
+ATOM      4  O2  GLY     1       0.000   0.000   0.000  1.00  0.00           O
+ATOM      5  N1  GLY     1       0.000   0.000   0.000  1.00  0.00           N
+ATOM      6  O3  HOH     2       0.000   0.000   0.000  1.00  0.00           O
+ATOM      7  O4  HOH     3       0.000   0.000   0.000  1.00  0.00           O
+ATOM      8  O5  HOH     4       0.000   0.000   0.000  1.00  0.00           O
+"""
+    protein = oddt.toolkit.readstring('pdb', pdb_block)
+    protein.protein = True
+    assert len(protein.residues) == 4
+    protein.addh(only_polar=True)
+    assert len(protein.residues) == 4
+    protein.addh()
+    assert len(protein.residues) == 4
+def test_pickle():
+    """Pickle molecules"""
+    mols = list(oddt.toolkit.readfile('sdf', xiap_actives))
+    pickled_mols = list(map(lambda x: loads(dumps(x)), mols))
+    assert_array_equal(list(map(lambda x: x.title, mols)),
+                       list(map(lambda x: x.title, pickled_mols)))
+    assert_array_equal(list(map(lambda x: x.smiles, mols)),
+                       list(map(lambda x: x.smiles, pickled_mols)))
+    for mol, pickled_mol in zip(mols, pickled_mols):
+        assert dict(mol.data) == dict(pickled_mol.data)
+    # Test pickling of atom_dicts
+    assert_array_equal(list(map(lambda x: x._atom_dict is None, mols)),
+                       [True] * len(mols))
+    mols_atom_dict = np.hstack(list(map(lambda x: x.atom_dict, mols)))
+    assert_array_equal(list(map(lambda x: x._atom_dict is not None, mols)),
+                       [True] * len(mols))
+    pickled_mols = list(map(lambda x: loads(dumps(x)), mols))
+    assert_array_equal(list(map(lambda x: x._atom_dict is not None, pickled_mols)),
+                       [True] * len(mols))
+    pickled_mols_atom_dict = np.hstack(list(map(lambda x: x._atom_dict, pickled_mols)))
+    for name in mols[0].atom_dict.dtype.names:
+        if issubclass(np.dtype(mols_atom_dict[name].dtype).type, np.number):
+            assert_array_almost_equal(mols_atom_dict[name],
+                                      pickled_mols_atom_dict[name])
+        else:
+            assert_array_equal(mols_atom_dict[name],
+                               pickled_mols_atom_dict[name])
+    # Lazy Mols
+    mols = list(oddt.toolkit.readfile('sdf', xiap_actives, lazy=True))
+    pickled_mols = list(map(lambda x: loads(dumps(x)), mols))
+    assert_array_equal(list(map(lambda x: x._source is not None, pickled_mols)),
+                       [True] * len(mols))
+    assert_array_equal(list(map(lambda x: x.title, mols)),
+                       list(map(lambda x: x.title, pickled_mols)))
+    assert_array_equal(list(map(lambda x: x.smiles, mols)),
+                       list(map(lambda x: x.smiles, pickled_mols)))
+    for mol, pickled_mol in zip(mols, pickled_mols):
+        assert dict(mol.data) == dict(pickled_mol.data)
+def test_diverse_conformers():
+    # FIXME: make toolkit a module so we can import from it
+    diverse_conformers_generator = oddt.toolkit.diverse_conformers_generator
+    mol = oddt.toolkit.readstring(
+        'smi',
+        'CN1CCN(S(=O)(C2=CC=C(OCC)C(C3=NC4=C(N(C)N=C4CCC)C(N3)=O)=C2)=O)CC1'
+    )
+    mol.make3D()
+    res = []
+    for conf in diverse_conformers_generator(mol, seed=123456):
+        res.append(rmsd(mol, conf))
+    assert len(res) == 10
+    if oddt.toolkit.backend == 'ob':
+        if oddt.toolkit.__version__ < '0.3':
+            assert_array_almost_equal(res, [0., 3.043712, 3.897143, 3.289482,
+.066374, 2.909683, 2.913927,
+.488244, 3.70603, 3.597467])
+        else:
+            assert_array_almost_equal(res, [0.0, 1.372770, 2.489789, 2.759941,
+.968366, 3.228773, 3.392191,
+.921166, 3.185065, 3.283915])
+    # else:
+    #     if oddt.toolkit.__version__ > '2016.03.9':
+    #         assert_array_almost_equal(res, [1.237538, 2.346984, 0.900624,
+    #                                         3.469511, 1.886213, 2.128909,
+    #                                         2.852608, 1.312513, 1.291595,
+    #                                         1.326843])
+    #     else:
+    #         assert_array_almost_equal(res, [3.08995, 2.846358, 3.021795,
+    #                                         1.720319, 2.741972, 2.965332,
+    #                                         2.925344, 2.930157, 2.934049,
+    #                                         3.009545])
+    # check all implemented methods
+    if oddt.toolkit.backend == 'ob':
+        methods = ['ga', 'confab']
+    else:
+        methods = ['dg', 'etkdg', 'kdg', 'etdg']
+    for method in methods:
+        assert len(diverse_conformers_generator(mol,
+                                                seed=123456,
+                                                n_conf=5,
+                                                method=method)) == 5
+        assert len(diverse_conformers_generator(mol,
+                                                seed=123456,
+                                                n_conf=10,
+                                                method=method)) == 10
+        assert len(diverse_conformers_generator(mol,
+                                                seed=123456,
+                                                n_conf=20,
+                                                method=method)) == 20
+def test_indices():
+    """Test 0 and 1 based atom indices"""
+    mol = oddt.toolkit.readstring('smi', 'CCc1cc(C)c(C)cc1-c1ccc(-c2cccc(C)c2)cc1')
+    atom = mol.atoms[0]
+    assert atom.idx0 == 0
+    assert atom.idx1 == 1
+    # the unmarked index is deprecated in ODDT
+    with pytest.warns((DeprecationWarning, FutureWarning)):
+        assert atom.idx == 1
+def test_pickle_protein():
+    """Pickle proteins"""
+    # Proteins
+    rec = next(oddt.toolkit.readfile('pdb', xiap_receptor))
+    # generate atom_dict
+    assert rec.atom_dict is not None
+    assert rec._atom_dict is not None
+    pickled_rec = loads(dumps(rec))
+    assert pickled_rec.protein is False
+    assert pickled_rec._atom_dict is not None
+    rec.protein = True
+    # setting protein property should clean atom_dict cache
+    assert rec._atom_dict is None
+    # generate atom_dict
+    assert rec.atom_dict is not None
+    pickled_rec = loads(dumps(rec))
+    assert pickled_rec.protein is True
+    assert pickled_rec._atom_dict is not None
+if oddt.toolkit.backend == 'rdk':
+    def test_badmol():
+        """Propagate None's for bad molecules"""
+        mol = oddt.toolkit.readstring('smi', 'c1cc2')
+        assert mol is None
+def test_dicts():
+    """Test ODDT numpy structures, aka. dicts"""
+    mols = list(oddt.toolkit.readfile('sdf', xiap_actives))
+    list(map(lambda x: x.addh(only_polar=True), mols))
+    skip_cols = ['radius', 'charge', 'id',
+                 # following fields need to be standarized
+                 'hybridization',
+                 'numhs',
+                 'formalcharge',
+                 ]
+    all_cols = [name for name in mols[0].atom_dict.dtype.names
+                if name not in ['coords', 'neighbors', 'neighbors_id']]
+    common_cols = [name for name in all_cols if name not in skip_cols]
+    # Small molecules
+    all_dicts = np.hstack([mol.atom_dict for mol in mols])
+    all_dicts = all_dicts[all_dicts['atomicnum'] != 1]
+    data = pd.DataFrame({name: all_dicts[name] for name in all_cols})
+    data['mol_idx'] = [i
+                       for i, mol in enumerate(mols)
+                       for atom in mol
+                       if atom.atomicnum != 1]
+    # Save correct results
+    # data[common_cols].to_csv(
+    #     os.path.join(test_data_dir, 'data/results/xiap/mols_atom_dict.csv'),
+    #     index=False)
+    corr_data = pd.read_csv(os.path.join(test_data_dir, 'data', 'results',
+                                         'xiap', 'mols_atom_dict.csv')
+                            ).fillna('')
+    for name in common_cols:
+        if issubclass(np.dtype(data[name].dtype).type, np.number):
+            mask = data[name] - corr_data[name] > 1e-6
+            for i in np.argwhere(mask.values):
+                print(i, data[name][i].values, corr_data[name][i].values,
+                      mols[data['mol_idx'][int(i)]].write('smi'))
+            assert_array_almost_equal(
+                data[name],
+                corr_data[name],
+                err_msg='Mols atom_dict\'s collumn: "%s" is not equal' % name)
+        else:
+            mask = data[name] != corr_data[name]
+            for i in np.argwhere(mask.values):
+                print(i, data[name][i].values, corr_data[name][i].values,
+                      mols[data['mol_idx'][int(i)]].write('smi'))
+            assert_array_equal(
+                data[name],
+                corr_data[name],
+                err_msg='Mols atom_dict\'s collumn: "%s" is not equal' % name)
+    # Protein
+    rec = next(oddt.toolkit.readfile('pdb', xiap_receptor))
+    rec.protein = True
+    rec.addh(only_polar=True)
+    skip_cols = ['radius', 'charge', 'resid', 'id',
+                 # following fields need to be standarized
+                 'hybridization',
+                 'numhs',
+                 'formalcharge',
+                 ]
+    common_cols = [name for name in all_cols if name not in skip_cols]
+    all_dicts = rec.atom_dict[rec.atom_dict['atomicnum'] != 1]
+    data = pd.DataFrame({name: all_dicts[name] for name in all_cols})
+    # Save correct results
+    # data[common_cols].to_csv(
+    #     os.path.join(test_data_dir, 'data/results/xiap/prot_atom_dict.csv'),
+    #     index=False)
+    corr_data = pd.read_csv(os.path.join(test_data_dir, 'data', 'results',
+                                         'xiap', 'prot_atom_dict.csv')
+                            ).fillna('')
+    for name in common_cols:
+        if issubclass(np.dtype(data[name].dtype).type, np.number):
+            mask = data[name] - corr_data[name] > 1e-6
+            for i in np.argwhere(mask.values):
+                print(i,
+                      data['atomtype'][i].values,
+                      data['resname'][i].values,
+                      data[name][i].values,
+                      corr_data[name][i].values)
+            assert_array_almost_equal(
+                data[name],
+                corr_data[name],
+                err_msg='Protein atom_dict\'s collumn: "%s" is not equal' % name)
+        else:
+            mask = data[name] != corr_data[name]
+            for i in np.argwhere(mask.values):
+                print(i,
+                      data['atomtype'][i].values,
+                      data['resname'][i].values,
+                      data[name][i].values,
+                      corr_data[name][i].values)
+            assert_array_equal(
+                data[name],
+                corr_data[name],
+                err_msg='Protein atom_dict\'s collumn: "%s" is not equal' % name)
+def test_ss():
+    """Secondary structure assignment"""
+    # Alpha Helix
+    prot_file = os.path.join(test_data_dir, 'data', 'pdb', '1cos_helix.pdb')
+    protein = next(oddt.toolkit.readfile('pdb', prot_file))
+    protein.protein = True
+    # print(protein.res_dict['resname'])
+    # print(protein.res_dict['isalpha'])
+    # print(protein.res_dict['isbeta'])
+    isalpha = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+, 19, 20, 21, 22, 23, 24, 25, 26]
+    assert len(protein.res_dict) == 29
+    assert_array_equal(np.where(protein.res_dict['isalpha'])[0], isalpha)
+    assert protein.res_dict['isalpha'].sum() == 27
+    assert protein.res_dict['isbeta'].sum() == 0
+    # Beta Sheet
+    prot_file = os.path.join(test_data_dir, 'data', 'pdb', '1icl_sheet.pdb')
+    protein = next(oddt.toolkit.readfile('pdb', prot_file))
+    protein.protein = True
+    # print(protein.res_dict['resname'])
+    # print(protein.res_dict['isalpha'])
+    # print(protein.res_dict['isbeta'])
+    # print(protein.res_dict['isbeta'])
+    # for mask_group in np.split(np.argwhere(protein.res_dict['isbeta']).flatten(),
+    #                            np.argwhere(np.diff(np.argwhere(protein.res_dict['isbeta']).flatten()) != 1).flatten() + 1):
+    #         print(mask_group + 1, protein.res_dict[mask_group]['resname'])
+    isbeta = [2, 3, 4, 5, 10, 11, 12, 13]
+    assert len(protein.res_dict) == 29
+    assert_array_equal(np.where(protein.res_dict['isbeta'])[0], isbeta)
+    assert protein.res_dict['isbeta'].sum() == 8
+    assert protein.res_dict['isalpha'].sum() == 0
+    # Protein test
+    protein = next(oddt.toolkit.readfile('pdb', xiap_receptor))
+    protein.protein = True
+    # print(protein.res_dict['resname'])
+    # print(protein.res_dict['isalpha'])
+    # for mask_group in np.split(np.argwhere(protein.res_dict['isalpha']).flatten(),
+    #                            np.argwhere(np.diff(np.argwhere(protein.res_dict['isalpha']).flatten()) != 1).flatten() + 1):
+    #         print(mask_group + 1, protein.res_dict[mask_group]['resname'])
+    # print(protein.res_dict['isbeta'])
+    # for mask_group in np.split(np.argwhere(protein.res_dict['isbeta']).flatten(),
+    #                            np.argwhere(np.diff(np.argwhere(protein.res_dict['isbeta']).flatten()) != 1).flatten() + 1):
+    #         print(mask_group + 1, protein.res_dict[mask_group]['resname'])
+    isalpha = [15, 16, 17, 18, 19, 20, 28, 29, 30, 31, 32, 33, 63, 64, 65, 66,
+, 68, 69, 70, 75, 76, 77, 78, 79, 80, 83, 84, 85, 86, 87, 88,
+, 90, 91, 121, 122, 123, 124, 125, 126, 127, 128]
+    isbeta = [36, 37, 38, 45, 46, 47, 52, 53, 54]
+    assert_array_equal(np.where(protein.res_dict['isalpha'])[0], isalpha)
+    assert_array_equal(np.where(protein.res_dict['isbeta'])[0], isbeta)
+    assert len(protein.res_dict) == 136
+    assert protein.res_dict['isalpha'].sum() == 43
+    assert protein.res_dict['isbeta'].sum() == 9
+    assert (protein.res_dict['isalpha'] &
+            protein.res_dict['isbeta']).sum() == 0  # Must be zero!
+    assert (~protein.res_dict['isalpha'] &
+            ~protein.res_dict['isbeta']).sum() == 84
+def test_pdbqt():
+    """RDKit PDBQT writer and reader"""
+    mol = next(oddt.toolkit.readfile('sdf', xiap_actives))
+    mol2 = oddt.toolkit.readstring('pdbqt', mol.write('pdbqt'))
+    assert mol.title == mol2.title
+    # test loop breaks in DFS algorithm
+    mol = oddt.toolkit.readstring('smi', 'CCc1cc(C)c(C)cc1-c1ccc(-c2cccc(C)c2)cc1')
+    mol.make3D()
+    # roundtrip molecule with template
+    mol2 = oddt.toolkit.readstring('pdbqt', mol.write('pdbqt'))
+    mol.removeh()
+    assert len(mol.atoms) == len(mol2.atoms)
+    def nodes_size(block):
+        out = OrderedDict()
+        current_key = None
+        for line in block.split('\n'):
+            if line[:4] == 'ROOT' or line[:6] == 'BRANCH':
+                current_key = line.strip()
+                out[current_key] = 0
+            elif line[:4] == 'ATOM':
+                out[current_key] += 1
+        return list(out.values())
+    # check the branch order and size
+    if oddt.toolkit.backend == 'ob':
+        assert_array_equal(nodes_size(mol.write('pdbqt')),
+                           [6, 8, 2, 7])
+    else:
+        assert_array_equal(nodes_size(mol.write('pdbqt')),
+                           [8, 6, 7, 2])
+    ligand_file = os.path.join(test_data_dir, 'data', 'dude', 'xiap',
+                               'crystal_ligand.sdf')
+    mol = next(oddt.toolkit.readfile('sdf', ligand_file))
+    assert_array_equal(nodes_size(mol.write('pdbqt')),
+                       [8, 3, 6, 6, 1, 6, 3, 2, 2])
+    # roundtrip a disconnected fragments
+    mol = oddt.toolkit.readstring('smi', 'c1ccccc1.c1ccccc1C')
+    if oddt.toolkit.backend == 'ob':
+        kwargs = {'opt': {'r': None}}
+    else:
+        kwargs = {'flexible': False}
+    mol2 = oddt.toolkit.readstring('pdbqt', mol.write('pdbqt', **kwargs))
+    assert len(mol.atoms) == len(mol2.atoms)
+    mol2 = oddt.toolkit.readstring('pdbqt', mol.write('pdbqt'))
+    assert len(mol.atoms) == len(mol2.atoms)
+def test_residue_info():
+    """Residue properties"""
+    mol_file = os.path.join(test_data_dir, 'data', 'pdb', '3kwa_5Apocket.pdb')
+    mol = next(oddt.toolkit.readfile('pdb', mol_file))
+    assert len(mol.residues) == 19
+    res = mol.residues[0]
+    assert res.idx0 == 0
+    assert res.number == 92
+    assert res.chain == 'A'
+    assert res.name == 'GLN'
+def test_canonize_ring_path():
+    """Test canonic paths"""
+    path0 = list(range(6))
+    path = deque(path0)
+    path.rotate(3)
+    assert canonize_ring_path(path) == path0
+    path.reverse()
+    assert canonize_ring_path(path) == path0
+    with pytest.raises(ValueError):
+        canonize_ring_path(tuple(range(6)))