Diff of /tests/test_pandas.py [000000] .. [3b722e]

Switch to side-by-side view

--- a
+++ b/tests/test_pandas.py
@@ -0,0 +1,198 @@
+import os
+from tempfile import NamedTemporaryFile
+
+from numpy.testing import assert_array_equal
+import pandas as pd
+
+import oddt
+import oddt.pandas as opd
+
+test_data_dir = os.path.dirname(os.path.abspath(__file__))
+input_fname = os.path.join(test_data_dir, 'data/dude/xiap/actives_docked.sdf')
+
+
+def test_classes():
+    """ Test oddt.pandas classes behavior """
+    df = opd.read_sdf(input_fname)
+
+    # Check classes inheritance
+    assert isinstance(df, opd.ChemDataFrame)
+    assert isinstance(df, pd.DataFrame)
+    assert isinstance(df['mol'], opd.ChemSeries)
+    assert isinstance(df['mol'], pd.Series)
+    assert isinstance(df, pd.DataFrame)
+
+    # Check custom metadata
+    assert hasattr(df, '_molecule_column')
+    assert hasattr(df[['mol']], '_molecule_column')
+    assert df._molecule_column == df[['mol']]._molecule_column
+
+    # Check if slicing perserve classes
+    assert isinstance(df.head(1), opd.ChemDataFrame)
+    assert isinstance(df['mol'].head(1), opd.ChemSeries)
+
+
+def test_reading():
+    """ Test reading molecule files to ChemDataFrame """
+    df = opd.read_sdf(input_fname)
+
+    # Check dimensions
+    assert len(df) == 100
+    assert len(df.columns) == 15
+
+    df = opd.read_sdf(input_fname, smiles_column='smi_col')
+    assert 'smi_col' in df.columns
+
+    df = opd.read_sdf(input_fname,
+                      molecule_column=None,
+                      molecule_name_column=None,
+                      usecols=['name'])
+    assert 'mol' not in df.columns
+    assert 'mol_name' not in df.columns
+    assert len(df.columns) == 1
+
+    df = opd.read_sdf(input_fname,
+                      usecols=['name', 'uniprot_id', 'act'])
+    assert len(df.columns) == 5  # 3 from use_cols + 1 'mol' + 1 'mol_name'
+    assert 'uniprot_id' in df.columns
+    assert 'smi_col' not in df.columns
+
+    # Chunk reading
+    chunks = []
+    for chunk in opd.read_sdf(input_fname, chunksize=10):
+        assert len(chunk) == 10
+        chunks.append(chunk)
+    assert len(chunks) == 10
+    df = pd.concat(chunks)
+
+    # Check dimensions
+    assert len(df) == 100
+
+
+def test_substruct_sim_search():
+    df = opd.read_sdf(input_fname).head(10)
+    query = oddt.toolkit.readstring('smi', 'C(=O)(N1C[C@H](C[C@H]1C(=O)N[C@@H]1CCCc2c1cccc2)Oc1ccccc1)[C@@H](NC(=O)[C@H](C)NC)C1CCCCC1')
+
+    ge_answear = [True, True, True, False, True, False, False, False, False, False]
+    assert (df.mol >= query).tolist() == ge_answear
+    assert (query <= df.mol).tolist() == ge_answear
+
+    le_answear = [True, True, True, True, True, True, False, False, False, True]
+    assert (df.mol <= query).tolist() == le_answear
+    assert (query >= df.mol).tolist() == le_answear
+
+    sim = df.mol.calcfp() | query.calcfp()
+    assert sim.dtype == 'float64'
+
+
+def test_mol2():
+    """Writing and reading of mol2 fils to/from ChemDataFrame"""
+    if oddt.toolkit.backend == 'ob':
+        df = opd.read_sdf(input_fname)
+        with NamedTemporaryFile(suffix='.mol2') as f:
+            df.to_mol2(f.name)
+            df2 = opd.read_mol2(f.name)
+            assert df.shape == df2.shape
+            chunks = []
+            for chunk in opd.read_mol2(f.name, chunksize=10):
+                assert len(chunk) == 10
+                chunks.append(chunk)
+            df3 = pd.concat(chunks)
+            assert df.shape == df3.shape
+        with NamedTemporaryFile(suffix='.mol2') as f:
+            df.to_mol2(f.name, columns=['name', 'uniprot_id', 'act'])
+            df2 = opd.read_mol2(f.name)
+            assert len(df2.columns) == 5
+
+
+def test_sdf():
+    """Writing ChemDataFrame to SDF molecular files"""
+    df = opd.read_sdf(input_fname)
+    with NamedTemporaryFile(suffix='.sdf') as f:
+        df.to_sdf(f.name)
+        df2 = opd.read_sdf(f.name)
+    assert_array_equal(df.columns.sort_values(), df2.columns.sort_values())
+    with NamedTemporaryFile(suffix='.sdf') as f:
+        df.to_sdf(f.name, columns=['name', 'uniprot_id', 'act'])
+        df2 = opd.read_sdf(f.name)
+    assert len(df2.columns) == 5
+
+
+def test_csv():
+    df = opd.read_sdf(input_fname,
+                      columns=['mol', 'name', 'chembl_id', 'dude_smiles', 'act'])
+    df['act'] = df['act'].astype(float)
+    df['name'] = df['name'].astype(int)
+    with NamedTemporaryFile(suffix='.csv', mode='w+') as f:
+        for str_buff in (f, f.name):
+            df.to_csv(str_buff, index=False)
+            f.seek(0)
+            df2 = opd.read_csv(f.name, smiles_to_molecule='mol',
+                               molecule_column='mol')
+            assert df.shape == df2.shape
+            assert df.columns.tolist() == df2.columns.tolist()
+            assert df.dtypes.tolist() == df2.dtypes.tolist()
+
+    with NamedTemporaryFile(suffix='.csv', mode='w+') as f:
+        for str_buff in (f, f.name):
+            df.to_csv(str_buff, index=False, columns=['name', 'act'])
+            f.seek(0)
+            df2 = pd.read_csv(f.name)
+            assert df[['name', 'act']].shape == df2.shape
+            assert df[['name', 'act']].columns.tolist() == df2.columns.tolist()
+            assert df[['name', 'act']].dtypes.tolist() == df2.dtypes.tolist()
+
+
+def test_excel():
+    # just check if it doesn't fail
+    df = opd.read_sdf(input_fname,
+                      columns=['mol', 'name', 'chembl_id', 'dude_smiles', 'act'])
+    df = df.head(10)    # it's slow so use first 10 mols
+    df['act'] = df['act'].astype(float)
+    df['name'] = df['name'].astype(int)
+    with NamedTemporaryFile(suffix='.xls', mode='w') as f:
+        df.to_excel(f.name, index=False)
+        writer = pd.ExcelWriter(f.name, engine='xlsxwriter')
+        df.to_excel(writer, index=False)
+
+
+def test_chemseries_writers():
+    df = opd.read_sdf(input_fname,
+                      columns=['mol', 'name', 'chembl_id', 'dude_smiles', 'act'])
+
+    mols = df['mol']
+
+    # SMILES
+    with NamedTemporaryFile(suffix='.ism', mode='w') as f:
+        mols.to_smiles(f)
+        for mol in oddt.toolkit.readfile('smi', f.name):
+            assert isinstance(mol, oddt.toolkit.Molecule)
+
+    # SDF
+    with NamedTemporaryFile(suffix='.sdf', mode='w') as f:
+        mols.to_sdf(f)
+        for mol in oddt.toolkit.readfile('sdf', f.name):
+            assert isinstance(mol, oddt.toolkit.Molecule)
+
+    # mol2
+    if oddt.toolkit.backend == 'ob':
+        with NamedTemporaryFile(suffix='.mol2', mode='w') as f:
+            mols.to_mol2(f)
+            for mol in oddt.toolkit.readfile('mol2', f.name):
+                assert isinstance(mol, oddt.toolkit.Molecule)
+
+
+def test_ipython():
+    """iPython Notebook molecule rendering in SVG"""
+    df = opd.read_sdf(input_fname)
+    # mock ipython
+    oddt.toolkit.ipython_notebook = True
+    # png
+    oddt.toolkit.image_backend = 'png'
+    html = df.head(1).to_html()
+    assert '<img src="data:image/png;base64,' in html
+    # svg
+    oddt.toolkit.image_backend = 'svg'
+    html = df.head(1).to_html()
+    assert '<svg' in html
+    oddt.toolkit.ipython_notebook = False