OpenOmics / Git / Diff of /openomics/database/interaction.py

Models:
AlyssaS/
OpenOmics
Downloads: 1
Diff of /openomics/database/interaction.py [000000] .. [548210]
Switch to side-by-side view

--- a
+++ b/openomics/database/interaction.py
@@ -0,0 +1,1293 @@
+import copy
+import os
+from abc import abstractmethod
+from collections.abc import Iterable
+from typing import List, Dict, Any, Union, Optional
+
+import dask.dataframe as dd
+import networkx as nx
+import pandas as pd
+import scipy.sparse as ssp
+from Bio import SeqIO
+from logzero import logger
+from pandas.core.dtypes.common import is_numeric_dtype
+
+from openomics.database.base import Database
+from openomics.database.sequence import SequenceDatabase, UniProt
+from openomics.transforms.df import filter_rows
+
+__all__ = ['STRING', 'GeneMania', 'IntAct', 'BioGRID', 'MiRTarBase', 'LncBase', 'TargetScan', 'TarBase',
+           'LncReg', 'LncRNA2Target', 'lncRNome', 'NPInter', 'RNAInter', 'StarBase']
+
+class Interactions(Database):
+    edges: Optional[Union[pd.DataFrame, dd.DataFrame]]
+    def __init__(self, path, file_resources: Dict, source_col_name: str = None, target_col_name: str = None,
+                 edge_attr: List[str] = None, filters: Union[str, Dict[str, Union[str, List[str]]]] = None,
+                 directed: bool = True, relabel_nodes: dict = None, blocksize=None, **kwargs):
+        """
+        This is an abstract class used to instantiate a database given a folder containing various file resources. When creating a Database class, the load_data function is called where the file resources are load as a DataFrame and performs necessary processings. This class provides an interface for RNA classes to annotate various genomic annotation, functional annotation, sequences, and disease associations.
+        Args:
+            path (str):
+                The folder path containing the data files.
+            file_resources (dict):
+                Default None, used to list required files for load_network of the dataset. A dictionary where keys are required filenames and value are file paths. If None, then the class constructor should automatically build the required file resources dict.
+            source_col_name (str):
+                Column name of DataFrame to be used as the source node names.
+            target_col_name (str):
+                Column name of DataFrame to be used as the target node names.
+            edge_attr (list):
+                A list of column names to be included as attributes for each edge (source-target pairs).
+            filters (dict):
+                Optional. A dict with key matching the data table (from load_network()) columns and values for the filtering on that column.
+            directed (bool): default True,
+                Whether to create a directed or an undirected network.
+            relabel_nodes (dict): default None,
+                A dictionary to rename nodes in the network, where the nodes with name <dict[key]> will be renamed to <dict[value]>
+            blocksize ():
+        """
+        self.filters = filters
+        self.source_col_name = source_col_name
+        self.target_col_name = target_col_name
+        self.directed = directed
+        self.edge_attr = edge_attr
+
+        super().__init__(path=path, file_resources=file_resources, blocksize=blocksize, **kwargs)
+        self.network = self.load_network(file_resources=self.file_resources, source_col_name=source_col_name,
+                                         target_col_name=target_col_name, edge_attr=edge_attr, directed=directed,
+                                         filters=filters, blocksize=blocksize)
+
+        if relabel_nodes is not None:
+            self.network = nx.relabel_nodes(self.network, mapping=relabel_nodes)
+
+        self.close()
+
+    @classmethod
+    def name(cls):
+        return cls.__name__
+
+    @abstractmethod
+    def load_network(self, file_resources: Dict, source_col_name: str, target_col_name: str,
+                     edge_attr: Union[str, List[str]], directed: bool, filters: Dict[str, Any], blocksize=None) \
+        -> nx.Graph:
+        """
+        Handles data processing from `file_resources` to a Pandas DataFrame which contain edgelist data, then constructs
+        and return a NetworkX Graph.
+        Args:
+            file_resources: a dict of file name and file path/object
+            source_col_name (str): column name of the dataframe for source in the edge
+            target_col_name (str): column name of the dataframe for target in the edge
+            edge_attr (list): list of str for column data to include in each edge
+            directed (bool): True to return a DiGraph(), else Graph()
+            filters: A dict of {column name: column values} to filter the dataframe
+            blocksize ():
+        Returns:
+            network: a NetworkX Graph or DiGraph
+        """
+        raise NotImplementedError
+
+    def get_interactions(self, nodelist=None, data=False, inclusive=True, relabel_nodes: Dict[str, str] = None):
+        """
+
+        Args:
+            nodelist (list):
+                A list of nodes to fetch edges from
+            data (bool): default False
+                Whether to include edge attributes
+            inclusive (bool): default False
+                Whether to only retrieve edges from nodes inclusive in nodelist.
+
+        Returns:
+            edges (OutEdgeView): a NetworkX edgelist
+        """
+        if not hasattr(self, "network"):
+            raise Exception(
+                "{} does not have network interaction data yet. Must run load_network() and assign self.network field first.".format(
+                    self.name()))
+
+        g = self.network
+        if relabel_nodes:
+            g = nx.relabel_nodes(g, relabel_nodes, copy=False)
+
+        if nodelist is None:
+            return g.edges(data=data)
+
+        if inclusive:
+            return g.subgraph(nodelist).edges(data=data)
+        else:
+            return g.edges(nbunch=nodelist, data=data)
+
+
+class STRING(Interactions, SequenceDatabase):
+    """Loads the STRING database from https://string-db.org/ .
+
+    Default path: "https://stringdb-static.org/download/" .
+    Default file_resources: {
+        "{species_id}.protein.info.txt.gz": f"protein.info.{version}/{species_id}.protein.info.{version}.txt.gz",
+        "{species_id}.protein.aliases.txt.gz": f"protein.links.{version}/{species_id}.protein.aliases.{version}.txt.gz",
+        "{species_id}.protein.links.txt.gz": f"protein.links.{version}/{species_id}.protein.links.{version}.txt.gz",
+        "{species_id}.protein.sequences.fa.gz": f"protein.sequences.{version}/{species_id}.protein.sequences.{version}.fa.gz"
+    }
+
+    Edge attributes for protein.actions.txt include ["mode", 'action', 'is_directional', 'a_is_acting' "score"]
+    Edge attributes for protein.actions.txt include ["combined_score"]
+    """
+    COLUMNS_RENAME_DICT = {
+        "#string_protein_id": "string_protein_id",
+        "protein_external_id": "protein_id",
+        "preferred_name": "gene_name",
+        '#ncbi_taxid': 'species_id',
+        'string_protein_id_2': 'homologous_protein_id',
+    }
+
+    def __init__(self, path="https://stringdb-static.org/download/", file_resources=None,
+                 species_id: Union[str, List[str]] = "9606", version="v11.0",
+                 source_col_name="protein1", target_col_name="protein2",
+                 edge_attr: Union[str, List[str]] = 'combined_score', directed=False,
+                 relabel_nodes=None,
+                 index_col='#string_protein_id',
+                 keys=None,
+                 alias_types={'Ensembl_UniProt', 'Ensembl_UniProt_AC'},
+                 blocksize=None, **kwargs):
+        """
+
+        Args:
+            path ():
+            file_resources ():
+            species_id (): List of str of species id's
+                Provide a species_id string or a list of species_id's to download the species-specific STRING dataset, and
+                integrate them. If species_id is None, then download the full-dataset version of STRING, which is very
+                time-consuming.
+            version ():
+            source_col_name ():
+            target_col_name ():
+            source_index ():
+            target_index ():
+            edge_attr ():
+            directed ():
+            relabel_nodes ():
+            verbose ():
+            blocksize ():
+        """
+        self.version = version
+        self.species_id = copy.copy(species_id)
+        self.alias_types = alias_types
+        assert isinstance(edge_attr, str)
+
+        if file_resources is None:
+            file_resources = {}
+            if isinstance(species_id, (Iterable, str)) and len(species_id):
+                species_list = [species_id] if isinstance(species_id, str) else species_id
+                for species in species_list:
+                    file_resources[f"{species}.protein.info.txt.gz"] = \
+                        os.path.join(path, f"protein.info.{version}/{species}.protein.info.{version}.txt.gz")
+                    file_resources[f"{species}.protein.links.txt.gz"] = \
+                        os.path.join(path, f"protein.links.{version}/{species}.protein.links.{version}.txt.gz")
+                    file_resources[f"{species}.protein.links.detailed.txt.gz"] = \
+                        os.path.join(path, f"protein.links.detailed.{version}/"
+                                           f"{species}.protein.links.detailed.{version}.txt.gz")
+                    file_resources[f"{species}.protein.homology.txt.gz"] = \
+                        os.path.join(path, f"protein.homology.{version}/{species}.protein.homology.{version}.txt.gz")
+                    file_resources[f"{species}.clusters.proteins.txt.gz"] = \
+                        os.path.join(path, f"clusters.proteins.{version}/{species}.clusters.proteins.{version}.txt.gz")
+                    file_resources[f"{species}.protein.aliases.txt.gz"] = \
+                        os.path.join(path, f"protein.aliases.{version}/{species}.protein.aliases.{version}.txt.gz")
+                    file_resources[f"{species}.enrichment.terms.txt.gz"] = \
+                        os.path.join(path, f"enrichment.terms.{version}/{species}.enrichment.terms.{version}.txt.gz")
+                    file_resources[f"{species}.protein.sequences.fa.gz"] = \
+                        os.path.join(path, f"protein.sequences.{version}/{species}.protein.sequences.{version}.fa.gz")
+            else:
+                file_resources["protein.info.txt.gz"] = os.path.join(path, f"protein.info.{version}.txt.gz")
+                file_resources["protein.links.txt.gz"] = os.path.join(path, f"protein.links.{version}.txt.gz")
+                file_resources["protein.sequences.fa.gz"] = os.path.join(path, f"protein.sequences.{version}.fa.gz")
+        else:
+            if isinstance(self.species_id, Iterable):
+                file_resources = {fn: fp for fn, fp in file_resources.items() \
+                                  if any(fn.startswith(species) for species in self.species_id)}
+
+        super().__init__(path=path, file_resources=file_resources, source_col_name=source_col_name,
+                         target_col_name=target_col_name, edge_attr=edge_attr, directed=directed,
+                         relabel_nodes=relabel_nodes, blocksize=blocksize, index_col=index_col, keys=keys,
+                         col_rename=STRING.COLUMNS_RENAME_DICT, **kwargs)
+
+    def load_dataframe(self, file_resources: Dict[str, str], blocksize: int = None) -> pd.DataFrame:
+        # Load nodes
+        dfs = []
+        if blocksize:
+            for filename in [fn for fn, path in file_resources.items() \
+                             if 'info.txt' in fn and isinstance(path, str)]:
+                compression = 'gzip' if filename.endswith(".gz") else None
+                info_df = dd.read_table(file_resources[filename], na_values=['annotation not available'],
+                                        low_memory=True, compression=compression,
+                                        dtype={'protein_size': 'int8'},
+                                        blocksize=None if isinstance(blocksize, bool) else blocksize)
+
+                if self.keys is not None:
+                    info_df = info_df.loc[info_df[self.index_col].isin(self.keys)]
+
+                if self.index_col:
+                    info_df = info_df.set_index(self.index_col, sorted=True)
+
+                # Join other attributes to node_info
+                species_id = filename.split(".")[0]
+                attrs = self.load_accessory_data(file_resources, species_id=species_id,
+                                                 alias_types=self.alias_types, blocksize=False)
+                if attrs is not None:
+                    new_cols = attrs.columns.difference(info_df.columns)
+                    info_df = info_df.join(attrs[new_cols], on=self.index_col)
+
+                dfs.append(info_df)
+        else:
+            for filename in file_resources:
+                if filename.endswith("protein.info.txt"):
+                    info_df = pd.read_table(file_resources[filename], na_values=['annotation not available'],
+                                            dtype={'protein_size': 'int8'},
+                                            index_col=self.index_col, low_memory=True)
+                    index_split = info_df['#string_protein_id'].str.split(".", expand=True, n=1)
+                    info_df = info_df.assign(species_id=index_split[0], protein_embl_id=index_split[1])
+
+                    # Join other attributes to node_info
+                    species_id = filename.split(".")[0]
+                    attrs = self.load_accessory_data(file_resources, species_id=species_id,
+                                                     alias_types=self.alias_types,
+                                                     blocksize=blocksize)
+                    if attrs is not None:
+                        new_cols = attrs.columns.difference(info_df.columns)
+                        info_df = info_df.join(attrs[new_cols], on=self.index_col)
+                    dfs.append(info_df)
+
+        if not len(dfs):
+            raise Exception("Must provide at least one 'protein.info.txt' file.")
+
+        if blocksize:
+            protein_info: dd.DataFrame = dd.concat(dfs, axis=0, interleave_partitions=True)
+        else:
+            protein_info = pd.concat(dfs, axis=0)
+
+        return protein_info
+
+    def load_accessory_data(self, file_resources: Dict[str, str], species_id: str,
+                            accessory_files=['protein.aliases', 'protein.homology', 'protein.enrichment',
+                                             'clusters.proteins'],
+                            alias_types={'Ensembl_UniProt', 'Ensembl_UniProt_AC'}, blocksize=False, ) \
+        -> Union[pd.DataFrame, dd.DataFrame]:
+        """
+        Stack the annotations files for the provided `species_id`, such that rows in the annotations are filtered by
+        `keys` (if not null), indexed by "#string_protein_id", and with attributes transformed to a dataframe columns.
+
+        Args:
+            file_resources (): a dict of filename and filepath
+            species_id (str): the species_id string which is used to select only files that have the same prefix.
+            accessory_files (List[str]):
+                A list of strings that specify which types of annotation files to integrate, i.e., only select files
+                having a substring matching one of these.
+                Default ['protein.aliases', 'protein.homology', 'protein.enrichment', 'clusters.proteins'].
+            alias_types (): a set of string, default {'Ensembl_UniProt_AC'}
+                A set of `source` values in the `protein.aliases` annotation to aggregate `alias`'s for.
+                Must be a subset of {'Ensembl_Source', 'Ensembl_gene', 'Ensembl_transcript', 'Ensembl_UniGene',
+                    'Ensembl_RefSeq_short', 'Ensembl_RefSeq', 'Ensembl_OTTG', 'Ensembl_OTTP', 'Ensembl_UCSC',
+                    'Ensembl_UniProt', 'Ensembl_UniProt_AC', 'Ensembl_EntrezGene', 'Ensembl_EMBL', 'Ensembl_protein_id'}
+            blocksize (bool): Recommended to use Pandas to avoid uncessary overhead.
+
+        Returns:
+            dd.Dataframe or pd.DataFrame
+
+        """
+        allowed_prefixes = {'protein.aliases', 'protein.homology', 'protein.enrichment', 'clusters.proteins'}
+        if not set(accessory_files).issubset(allowed_prefixes):
+            logger.warn(f'{set(accessory_files).difference(allowed_prefixes)} files are not supported')
+
+        select_files = []
+        for fn, path in file_resources.items():
+            if fn.startswith(species_id) and any(ftype in fn for ftype in accessory_files):
+                select_files.append(fn)
+
+        dfs = []
+        for filename in select_files:
+            args = dict(
+                low_memory=True,
+                dtype={'cluster_id': 'category', '#ncbi_taxid': 'category', 'category': 'category',
+                       'source': 'category'})
+            compression = 'gzip' if filename.endswith(".gz") else None
+            if blocksize:
+                if not isinstance(file_resources[filename], str): continue
+                df = dd.read_table(file_resources[filename], compression=compression, **args)
+            else:
+                df = pd.read_table(file_resources[filename], **args)
+
+            # Set index for df
+            for col in ['#string_protein_id', 'protein_id', '#string_protein_1']:
+                if col in df.columns:
+                    df = df.set_index(col, sorted=True) if blocksize else df.set_index(col)
+                    break
+
+            # Set index
+            if df.index.name is None:
+                continue
+            elif self.index_col and df.index.name != self.index_col:
+                df.index = df.index.rename(self.index_col)
+            if blocksize:
+                assert df.known_divisions
+
+            # Filter rows
+            if self.keys is not None:
+                df = df.loc[df.index.isin(self.keys)]
+
+            # Groupby on index and perform appropriate transforms depending on the annotation type
+            if 'protein.homology' in filename:
+                df = df.loc[df.index != df['string_protein_id_2']]
+                df = df.groupby(self.index_col)['string_protein_id_2'].unique().to_frame()
+                # TODO ignored column of size of homologous regions
+
+            elif 'clusters.protein' in filename:
+                df = df.groupby(self.index_col)[['cluster_id', '#ncbi_taxid']].unique()
+
+            elif 'protein.enrichment' in filename:
+                df = df.groupby(self.index_col)['term'].unique().to_frame()
+
+            elif 'protein.aliases' in filename:
+                df = df.loc[df['source'].isin(alias_types)]
+                df['source'] = df['source'].cat.set_categories(alias_types)
+                if blocksize:
+                    # Set alias values to lists so pivot_table(..., aggfunc='sum') will concatenate them
+                    df = df.assign(alias=df['alias'].map(lambda x: [x], meta=pd.Series([[""]])))
+                    df = dd.pivot_table(df.reset_index(),
+                                        index='#string_protein_id', columns='source', values='alias', aggfunc='sum')
+                else:
+                    df = df.reset_index().groupby([self.index_col, 'source'])['alias'].unique().unstack(level=1)
+
+            if blocksize and not df.known_divisions:
+                df.divisions = df.compute_current_divisions()
+
+            if not len(df.index):
+                continue
+
+            dfs.append(df)
+
+        if dfs:
+            attrs = dd.concat(dfs, axis=1) if blocksize else pd.concat(dfs, axis=1)
+        else:
+            attrs = None
+
+        return attrs
+
+    def load_network(self, file_resources, source_col_name='protein1', target_col_name='protein2',
+                     edge_attr: Union[str, List[str]] = 'combined_score', directed=False, filters=None, blocksize=None):
+        keys = self.data.index.compute() if isinstance(self.data, dd.DataFrame) else self.data.index
+        select_files = [fn for fn, path in file_resources.items() if "links" in fn]
+
+        # Load edges
+        edges_dfs = []
+        for filename in select_files:
+            args = dict(sep=" ", low_memory=True,
+                        dtype={'protein1': 'category', 'protein2': 'category',
+                               'neighborhood': 'uint8', 'fusion': 'uint8', 'cooccurence': 'uint8',
+                               'coexpression': 'uint8', 'experimental': 'uint8', 'database': 'uint8',
+                               'textmining': 'uint8', 'combined_score': 'uint8'})
+            if blocksize:
+                if not isinstance(file_resources[filename], str): continue
+                compression = 'gzip' if filename.endswith(".gz") else None
+                df: dd.DataFrame = dd.read_table(file_resources[filename], compression=compression, **args,
+                                                 blocksize=None if isinstance(blocksize, bool) else blocksize)
+
+                if compression:
+                    logger.info(f"Repartitioning {filename} from {df.npartitions} "
+                                f"partitions to {blocksize}-size partitions")
+                    df = df.repartition(partition_size=blocksize)
+
+            else:
+                df = pd.read_table(file_resources[filename], **args)
+
+            df = df.loc[df[source_col_name].isin(keys) & df[target_col_name].isin(keys)]
+            edges_dfs.append(df)
+
+        if len(edges_dfs) == 0:
+            return
+
+        # Concatenate multiple edgelists into dataframe
+        edges_df = dd.concat(edges_dfs, axis=0) if blocksize else pd.concat(edges_dfs, axis=0)
+        edges_df = edges_df.rename(columns=self.COLUMNS_RENAME_DICT)
+        logger.info(f"{self.name()}-{self.species_id}: {edges_df.columns.tolist()}, {edges_df.shape}")
+
+        # Convert edge_attr (edge weights) from 3 digit integer to float
+        assignfunc = {}
+        for col in (edge_attr if isinstance(edge_attr, list) else [edge_attr]):
+            if col in edges_df.columns and is_numeric_dtype(edges_df[col]):
+                assignfunc[col] = edges_df[col].astype('float16') / 1000
+        if assignfunc:
+            edges_df = edges_df.assign(**assignfunc)
+
+        edges_df = filter_rows(edges_df, filters=filters)
+
+        self.edges = edges_df
+        # Set ordering for rows and columns
+        node2idx = {node: i for i, node in enumerate(keys)}
+
+        if isinstance(edges_df, dd.DataFrame):
+            def edgelist2adj(df: pd.DataFrame) -> ssp.coo_matrix:
+                if df.shape[0] == 1 and df.iloc[0, 0] == 'foo':
+                    return None
+
+                df = df.assign(row=df[source_col_name].map(node2idx).astype('int'),
+                               col=df[target_col_name].map(node2idx).astype('int'))
+                df = df.dropna(subset=['row', 'col'])
+
+                if df.shape[0] == 0:
+                    return None
+
+                coo_adj = ssp.coo_matrix((df[edge_attr], (df['row'], df['col'])),
+                                         shape=(len(keys), len(keys)))
+                coo_adj.eliminate_zeros()
+                return coo_adj
+
+            # Create a sparse adjacency matrix for each partition, then add them to combine
+            adj = edges_df.reduction(chunk=edgelist2adj,
+                                     aggregate=lambda x: x.dropna().sum() if not x.isna().all() else None,
+                                     meta=pd.Series([ssp.coo_matrix])).compute()
+            assert len(adj) == 1, f"len(adj) = {len(adj)}"
+
+            G = nx.from_scipy_sparse_matrix(adj[0], create_using=nx.DiGraph() if directed else nx.Graph(),
+                                            edge_attribute='weight')
+            idx2node = {i: node for i, node in enumerate(keys)}
+            G = nx.relabel_nodes(G, mapping=idx2node, copy=True)
+            del adj
+
+        else:
+            # Determine which edge attr to add
+            if isinstance(edge_attr, (list, tuple)):
+                cols = edges_df.columns.intersection(edge_attr + [source_col_name, target_col_name])
+                edges_df = edges_df[cols]
+                use_attrs = True
+            elif isinstance(edge_attr, str):
+                cols = edges_df.columns.intersection([source_col_name, target_col_name, edge_attr])
+                edges_df = edges_df[cols]
+                use_attrs = edge_attr
+            else:
+                use_attrs = False
+            G = nx.from_pandas_edgelist(edges_df, source=source_col_name, target=target_col_name,
+                                        edge_attr=use_attrs, create_using=nx.DiGraph() if directed else nx.Graph())
+
+        return G
+
+    def get_sequences(self, index="protein_id", omic=None, agg=None):
+        if hasattr(self, "seq_dict"):
+            return self.seq_dict
+
+        self.seq_dict = {}
+        collisions = 0
+        for record in SeqIO.parse(self.file_resources["protein.sequences.fa"], "fasta"):
+            gene_id = str(record.name)
+
+            sequence_str = str(record.seq)
+            if index == "protein_name":
+                key = self.protein_id2name[gene_id]
+            elif index == "protein_id":
+                key = gene_id
+
+            if key in self.seq_dict:
+                collisions += 1
+
+            self.seq_dict[key] = sequence_str
+
+        logger.warn("Seq {} collisions: {}".format(index, collisions))
+        return self.seq_dict
+
+
+class GeneMania(Interactions):
+    """Loads the GeneMania database from  .
+
+    Default path: local_directory .
+    Default file_resources: {
+        "COMBINED.DEFAULT_NETWORKS.BP_COMBINING.txt": "COMBINED.DEFAULT_NETWORKS.BP_COMBINING.txt",
+        "identifier_mappings.txt": "identifier_mappings.txt",
+    }
+    """
+
+    def __init__(self, path, file_resources=None, source_col_name="Gene_A", target_col_name="Gene_B",
+                 edge_attr=None, filters=None, directed=True, relabel_nodes=None, **kwargs):
+        if edge_attr is None:
+            edge_attr = ["Weight"]
+        if file_resources is None:
+            file_resources = {}
+            file_resources["COMBINED.DEFAULT_NETWORKS.BP_COMBINING.txt"] = os.path.join(path,
+                                                                                        "COMBINED.DEFAULT_NETWORKS.BP_COMBINING.txt")
+            file_resources["identifier_mappings.txt"] = os.path.join(path,
+                                                                     "identifier_mappings.txt")
+
+        super().__init__(path=path, file_resources=file_resources, source_col_name=source_col_name,
+                         target_col_name=target_col_name, edge_attr=edge_attr, filters=filters, directed=directed,
+                         relabel_nodes=relabel_nodes, **kwargs)
+
+    def load_network(self, file_resources, source_col_name, target_col_name, edge_attr, directed, filters,
+                     blocksize=None):
+        interactions = pd.read_table(file_resources["COMBINED.DEFAULT_NETWORKS.BP_COMBINING.txt"], low_memory=True)
+        identifier = pd.read_table(file_resources["identifier_mappings.txt"])
+
+        # Rename ENSG ID's to gene names
+        identifier = identifier[identifier["Source"] == "Gene Name"]
+        id_mapping = pd.Series(identifier["Name"].values, index=identifier["Preferred_Name"]).to_dict()
+        interactions.replace(id_mapping, inplace=True)
+
+        genemania_RNA_RNA_network = nx.from_pandas_edgelist(interactions, source=source_col_name,
+                                                            target=target_col_name,
+                                                            edge_attr=edge_attr,
+                                                            create_using=nx.DiGraph())
+        return genemania_RNA_RNA_network
+
+
+class IntAct(Interactions):
+
+    def __init__(self, path, file_resources: Dict, source_col_name: str = None, target_col_name: str = None,
+                 source_index: str = None, target_index: str = None, edge_attr: List[str] = None, filters: dict = None,
+                 directed: bool = True, relabel_nodes: dict = None, blocksize=None, **kwargs):
+        super().__init__(path, file_resources, source_col_name, target_col_name, edge_attr, filters, directed,
+                         relabel_nodes, blocksize, **kwargs)
+
+
+class BioGRID(Interactions):
+    """Loads the BioGRID database from https://thebiogrid.org .
+
+    Default path: "https://downloads.thebiogrid.org/Download/BioGRID/Latest-Release/" .
+    Default file_resources: {
+        "BIOGRID-ALL-LATEST.tab2.zip": "BIOGRID-ALL-LATEST.tab2.zip",
+    }
+    """
+
+    def __init__(self, path="https://downloads.thebiogrid.org/Download/BioGRID/Latest-Release/",
+                 file_resources=None, source_col_name="Official Symbol Interactor A",
+                 target_col_name="Official Symbol Interactor B",
+                 edge_attr=['Score', 'Throughput', 'Experimental System', 'Experimental System Type'],
+                 filters=None, directed=False, relabel_nodes=None, **kwargs):
+        """
+
+        Args:
+            path ():
+            file_resources ():
+            source_col_name ():
+            target_col_name ():
+            source_index ():
+            target_index ():
+            edge_attr ():
+            filters (): Default None, example {"Organism Interactor A": 9606}.
+            directed ():
+            relabel_nodes ():
+            **kwargs ():
+        """
+        if file_resources is None:
+            file_resources = {}
+            file_resources["BIOGRID-ALL-LATEST.tab2.zip"] = os.path.join(path, "BIOGRID-ALL-LATEST.tab2.zip")
+
+        super().__init__(path=path, file_resources=file_resources, source_col_name=source_col_name,
+                         target_col_name=target_col_name, edge_attr=edge_attr, filters=filters, directed=directed,
+                         relabel_nodes=relabel_nodes, **kwargs)
+
+    def load_dataframe(self, file_resources: Dict[str, str], blocksize: int = None) -> pd.DataFrame:
+        args = dict(na_values=["-"], header=0, low_memory=True,
+                    # usecols=['Official Symbol Interactor A', 'Official Symbol Interactor B',
+                    #          'Organism Interactor A', 'Score', 'Throughput', 'Qualifications',
+                    #          'Modification', 'Phenotypes', 'Source Database'],
+                    dtype={'Score': 'float', 'Entrez Gene Interactor A': 'category',
+                           'Entrez Gene Interactor B': 'category',
+                           'BioGRID ID Interactor A': 'category', 'BioGRID ID Interactor B': 'category',
+                           'Systematic Name Interactor A': 'category', 'Systematic Name Interactor B': 'category',
+                           'Official Symbol Interactor A': 'category', 'Official Symbol Interactor B': 'category',
+                           'Pubmed ID': 'str', 'Throughput': 'category', 'Experimental System Type': 'category',
+                           'Experimental System': 'category', 'Modification': 'category', 'Source Database': 'category',
+                           'Organism Interactor A': 'category', 'Organism Interactor B': 'category'})
+
+        if blocksize:
+            edges = dd.read_table(file_resources["BIOGRID-ALL-LATEST.tab2"], blocksize=blocksize, **args, )
+        else:
+            edges = pd.read_table(file_resources["BIOGRID-ALL-LATEST.tab2"], **args, )
+
+        self.edges = edges
+
+        return edges
+
+    def load_network(self, file_resources, source_col_name, target_col_name, edge_attr, directed, filters,
+                     blocksize=None):
+        df = self.edges
+        df = filter_rows(df, filters)
+        network = nx.from_pandas_edgelist(df, source=source_col_name, target=target_col_name,
+                                          edge_attr=edge_attr,
+                                          create_using=nx.DiGraph() if directed else nx.Graph())
+        return network
+
+
+class MiRTarBase(Interactions):
+    """Loads the  database from  .
+
+        Default path:  .
+        Default file_resources: {
+            "": "",
+            "": "",
+            "": "",
+        }
+        """
+
+    def __init__(self, path="http://mirtarbase.mbc.nctu.edu.tw/cache/download/7.0/", file_resources=None,
+                 source_col_name="miRNA", target_col_name="Target Gene",
+                 edge_attr=None,
+                 filters=None,
+                 directed=True,
+                 relabel_nodes=None,
+                 strip_mirna_name=False, **kwargs):
+        """
+
+        Args:
+            path ():
+            file_resources ():
+            source_col_name ():
+            target_col_name ():
+            source_index ():
+            target_index ():
+            edge_attr ():
+            filters (): default None, example {"Species (Target Gene)": "Homo sapiens"}
+            directed ():
+            relabel_nodes ():
+            strip_mirna_name ():
+            **kwargs ():
+        """
+        if edge_attr is None:
+            edge_attr = ["Support Type"]
+        self.strip_mirna_name = strip_mirna_name
+
+        if file_resources is None:
+            file_resources = {}
+            file_resources["miRTarBase_MTI.xlsx"] = os.path.join(path, "miRTarBase_MTI.xlsx")
+
+        super().__init__(path=path, file_resources=file_resources, source_col_name=source_col_name,
+                         target_col_name=target_col_name, edge_attr=edge_attr, filters=filters, directed=directed,
+                         relabel_nodes=relabel_nodes, **kwargs)
+
+    def load_dataframe(self, file_resources: Dict[str, str], blocksize: int = None) -> pd.DataFrame:
+        df = pd.read_excel(self.file_resources["miRTarBase_MTI.xlsx"])
+        self.edges = df
+        return df
+
+    def load_network(self, file_resources, source_col_name, target_col_name, edge_attr, directed, filters,
+                     blocksize=None):
+        df = self.data
+        df = filter_rows(df, filters)
+
+        df['miRNA'] = df['miRNA'].str.rstrip('*')
+
+        if self.strip_mirna_name:
+            df['miRNA'] = df['miRNA'].str.lower().str.replace("-3p.*|-5p.*", "", regex=True)
+
+        mir_target_network = nx.from_pandas_edgelist(df, source=source_col_name, target=target_col_name,
+                                                     edge_attr=edge_attr,
+                                                     create_using=nx.DiGraph() if directed else nx.Graph())
+        return mir_target_network
+
+
+class LncBase(Interactions, Database):
+    """Loads the LncBase database from http://carolina.imis.athena-innovation.gr/diana_tools/web/index.php?r=lncbasev2%2Findex .
+
+    Default path: local_directory .
+    Default file_resources: {
+        "LncBasev2_download.csv": "LncBasev2_download.csv"",
+    }
+    """
+
+    def __init__(self, path='https://dianalab.e-ce.uth.gr/downloads/', file_resources=None, strip_mirna_name=False,
+                 source_col_name="mirna", target_col_name="geneId",
+                 edge_attr=None,
+                 filters=None,
+                 directed=True,
+                 relabel_nodes=None, ):
+        """
+
+        Args:
+            path ():
+            file_resources ():
+            strip_mirna_name ():
+            source_col_name ():
+            target_col_name ():
+            source_index ():
+            target_index ():
+            edge_attr ():
+            filters (): default None. Example: {"species": "Homo sapiens"}
+            directed ():
+            relabel_nodes ():
+        """
+        self.strip_mirna_name = strip_mirna_name
+
+        if edge_attr is None:
+            edge_attr = ["tissue", "positive_negative"]
+        if file_resources is None:
+            file_resources = {}
+            file_resources["LncBasev2_download.csv"] = os.path.join(path, "lncbase_v2_exp_data.tar.gz")
+
+        super().__init__(path=path, file_resources=file_resources, source_col_name=source_col_name,
+                         target_col_name=target_col_name, edge_attr=edge_attr, filters=filters, directed=directed,
+                         relabel_nodes=relabel_nodes)
+
+    def get_rename_dict(self, from_index="geneId", to_index="geneName"):
+        lncbase_df = pd.read_table(self.file_resources["LncBasev2_download.csv"], low_memory=True)
+        gene_id_to_gene_name_dict = pd.Series(lncbase_df["geneName"].values,
+                                              index=lncbase_df["geneId"]).to_dict()
+        return gene_id_to_gene_name_dict
+
+    def load_dataframe(self, file_resources: Dict[str, str], blocksize: int = None) -> pd.DataFrame:
+        df = pd.read_table(file_resources["LncBasev2_download.csv"], low_memory=True)
+        df.replace({"species": {"Homo Sapiens": "Homo sapiens", "Mus Musculus": "Mus musculus"}}, inplace=True)
+        return df
+
+    def load_network(self, file_resources, source_col_name, target_col_name, edge_attr, directed, filters,
+                     blocksize=None):
+        df = self.data
+        df = filter_rows(df, filters)
+
+        if self.strip_mirna_name:
+            df['mirna'] = df['mirna'].str.lower()
+            df['mirna'] = df['mirna'].str.replace("-3p.*|-5p.*", "", regex=True)
+
+        if edge_attr is None:
+            edge_attr = ["tissue", "positive_negative"]
+        lncBase_lncRNA_miRNA_network = nx.from_pandas_edgelist(df, source=source_col_name, target=target_col_name,
+                                                               edge_attr=edge_attr,
+                                                               create_using=nx.DiGraph() if directed else nx.Graph())
+        return lncBase_lncRNA_miRNA_network
+
+
+class TarBase(Interactions):
+    """
+
+    """
+
+    def __init__(self, path='https://dianalab.e-ce.uth.gr/downloads', file_resources: Dict = None,
+                 source_col_name: str = 'mirna', target_col_name: str = 'geneName',
+                 edge_attr: List[str] = None, filters: Union[str, Dict[str, Union[str, List[str]]]] = None,
+                 directed: bool = True, relabel_nodes: dict = None, blocksize=None, **kwargs):
+        """
+
+        Args:
+            path ():
+            file_resources ():
+            source_col_name ():
+            target_col_name ():
+            edge_attr ():
+            filters ():
+            directed ():
+            relabel_nodes ():
+            blocksize ():
+            **kwargs ():
+        """
+        if file_resources is None:
+            file_resources = {
+                'tarbase_v8_data.tar.gz': 'https://dianalab.e-ce.uth.gr/downloads/tarbase_v8_data.tar.gz',
+                'speclist': 'https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/docs/speclist',
+            }
+
+        super().__init__(path, file_resources, source_col_name, target_col_name, edge_attr, filters, directed,
+                         relabel_nodes, blocksize, **kwargs)
+
+    def load_dataframe(self, file_resources: Dict[str, str], blocksize: int = None) -> pd.DataFrame:
+        edges = pd.read_table(file_resources['tarbase_v8_data.tar.gz'], compression='tar',
+                              dtype={'tissue': 'category', 'method': 'category', 'positive_negative': 'category',
+                                     'species': 'category',
+                                     'direct_indirect': 'category', 'up_down': 'category', 'cell_line': 'category',
+                                     })
+
+        if 'speclist' in file_resources:
+            species_df = UniProt.get_species_list(file_resources['speclist'])
+            species_df = species_df[['Official (scientific) name', 'Common name', 'Synonym']].melt(ignore_index=False)
+            species_df = species_df.dropna().reset_index()
+            species_name2id = species_df.set_index('value')['NCBI-taxon'].to_dict()
+            edges['species_id'] = edges['species'].map(species_name2id)
+
+        self.edges = edges
+        return edges
+
+    def load_network(self, file_resources: Dict, source_col_name: str, target_col_name: str, edge_attr: List[str],
+                     directed: bool, filters: Dict[str, Any], blocksize=None):
+        df = self.data
+        df = filter_rows(df, filters)
+
+        # Remove parenthesis containing 3 letter species name
+        df['geneName'] = df['geneName'].str.replace(r'(\(\w{3}\)){1}$', '', regex=True)
+        idx = df['geneName'].str.contains('\(')
+        df.loc[idx, 'geneName'] = df.loc[idx, 'geneName'].str.replace(r'(\(\d of \d\))', '', regex=True).str.strip()
+
+        idx = df['geneName'].str.contains("\(\w*\)", regex=True)
+        df.loc[idx, 'geneName'] = df.loc[idx, 'geneName'].str.extract(r'\((\w*)\)(\w*)')[0]
+
+        idx = df['geneName'].str.contains('\(')
+        df.loc[idx, 'geneName'] = df.loc[idx, 'geneName'].str.split('(', expand=True)[0]
+
+        g = nx.from_pandas_edgelist(df, source=source_col_name, target=target_col_name,
+                                    edge_attr=edge_attr,
+                                    create_using=nx.DiGraph() if directed else nx.Graph())
+        return g
+
+
+class RNAInter(Interactions):
+    """
+
+    """
+
+    def __init__(self, path='http://www.rnainter.org/raidMedia/download/', file_resources: Dict = None,
+                 source_col_name: str = 'Interactor1.Symbol', target_col_name: str = 'Interactor2.Symbol',
+                 edge_attr: List[str] = 'score', filters: Union[str, Dict[str, Union[str, List[str]]]] = None,
+                 directed: bool = True, relabel_nodes: dict = None, blocksize=None, **kwargs):
+        """
+
+        Args:
+            path ():
+            file_resources ():
+            source_col_name ():
+            target_col_name ():
+            edge_attr ():
+            filters ():
+            directed ():
+            relabel_nodes ():
+            blocksize ():
+            **kwargs ():
+        """
+        if file_resources is None:
+            file_resources = {
+                'Download_data_RR.tar.gz': 'Download_data_RR.tar.gz',
+                'Download_data_RP.tar.gz': 'Download_data_RP.tar.gz',
+            }
+
+        super().__init__(path, file_resources, source_col_name, target_col_name, edge_attr, filters, directed,
+                         relabel_nodes, blocksize, **kwargs)
+
+    def load_dataframe(self, file_resources: Dict, blocksize: int = None) -> pd.DataFrame:
+        args = dict(dtype={'Category1': 'category', 'Category2': 'category',
+                           'Species1': 'category', 'Species2': 'category', 'score': 'float',
+                           'predict': 'category', 'weak': 'category', 'strong': 'category'})
+        edge_files = (fn for fn in file_resources if fn.startswith('Download_data'))
+        for fn in edge_files:
+            if blocksize:
+                if not isinstance(file_resources[fn], str): continue
+                edges = dd.read_table(file_resources[fn], compression='tar' if fn.endswith('.tar.gz') else None, **args)
+            else:
+                edges = pd.read_table(file_resources[fn], compression='tar' if fn.endswith('.tar.gz') else None, **args)
+
+        edges = filter_rows(edges, self.filters)
+
+        self.edges = edges
+        return edges
+
+    def load_network(self, file_resources, source_col_name='Interactor1.Symbol', target_col_name='Interactor2.Symbol',
+                     edge_attr='score', directed=True, filters=None, blocksize=None):
+        edges = self.data
+        if filters != self.filters:
+            edges = filter_rows(edges, filters)
+
+        g = nx.from_pandas_edgelist(edges, source=source_col_name, target=target_col_name,
+                                    edge_attr=edge_attr,
+                                    create_using=nx.DiGraph() if directed else nx.Graph())
+        return g
+
+
+class TargetScan(Interactions, Database):
+    """Loads the TargetScan database from "http://www.targetscan.org/" .
+
+    Default path: "http://www.targetscan.org/vert_72/vert_72_data_download/" .
+    Default file_resources: {
+        "miR_Family_Info.txt": "miR_Family_Info.txt.zip",
+        "Predicted_Targets_Info.default_predictions.txt": "Predicted_Targets_Info.default_predictions.txt.zip",
+        "": "",
+    }
+    """
+
+    def __init__(self, path="http://www.targetscan.org/vert_72/vert_72_data_download/", file_resources=None,
+                 source_col_name="MiRBase ID", target_col_name="Gene Symbol",
+                 edge_attr=["tissue", "positive_negative"], directed=True, relabel_nodes=None, species_id=None,
+                 strip_mirna_name=False, **kwargs):
+        self.strip_mirna_name = strip_mirna_name
+        self.species_id = species_id
+        if file_resources is None:
+            file_resources = {}
+            file_resources["miR_Family_Info.txt.zip"] = os.path.join(path, "miR_Family_Info.txt.zip")
+            file_resources["Predicted_Targets_Info.default_predictions.txt"] = os.path.join(path,
+                                                                                            "Predicted_Targets_Info.default_predictions.txt")
+
+        super().__init__(path=path, file_resources=file_resources, source_col_name=source_col_name,
+                         target_col_name=target_col_name,
+                         directed=directed, relabel_nodes=relabel_nodes, edge_attr=edge_attr, **kwargs)
+
+    def load_network(self, file_resources, source_col_name, target_col_name, edge_attr, directed, filters,
+                     blocksize=None):
+        self.df = self.process_miR_family_info_table(file_resources, self.species_id)
+        interactions_df = self.process_interactions_table(file_resources, self.df, self.species_id)
+        print(self.name(), interactions_df.columns.tolist())
+
+        mir_target_network = nx.from_pandas_edgelist(interactions_df,
+                                                     source=source_col_name, target=target_col_name,
+                                                     edge_attr=edge_attr,
+                                                     create_using=nx.DiGraph() if directed else nx.Graph())
+        return mir_target_network
+
+    def process_miR_family_info_table(self, file_resources, species=None):
+        miR_Family_Info_df = pd.read_table(file_resources["miR_Family_Info.txt"], delimiter='\t')
+
+        if species:
+            miR_Family_Info_df = miR_Family_Info_df[miR_Family_Info_df['Species ID'] == species]
+
+        # Standardize MiRBase ID to miRNA names obtained from RNA-seq hg19
+        if self.strip_mirna_name:
+            miR_Family_Info_df['MiRBase ID'] = miR_Family_Info_df['MiRBase ID'].str.lower()
+            miR_Family_Info_df['MiRBase ID'] = miR_Family_Info_df['MiRBase ID'].str.replace("-3p.*|-5p.*", "")
+
+        miR_Family_Info_df.drop_duplicates(inplace=True)
+        miR_Family_Info_df = miR_Family_Info_df.filter(items=['miR family', 'MiRBase ID', 'Seed+m8', 'Mature sequence',
+                                                              'Family Conservation?', 'MiRBase Accession'],
+                                                       axis="columns")
+        miR_Family_Info_df['MiRBase ID'] = miR_Family_Info_df['MiRBase ID'].astype(str)
+        return miR_Family_Info_df
+
+    def process_interactions_table(self, file_resources, family_to_miR_df, species_id):
+        """
+        This functions joins the interactions data table between miR Family and targets, and
+        Args:
+            file_resources:
+            family_to_miR_df:
+            species_id:
+
+        Returns:
+
+        """
+        # Load data frame from file
+        family_interactions_df = pd.read_table(file_resources["Predicted_Targets_Info.default_predictions.txt"],
+                                               dtype={'Species ID': 'category'},
+                                               delimiter='\t', low_memory=True)
+
+        # Select only miRNA-target pairs of certain species_id
+        if species_id:
+            family_interactions_df = family_interactions_df[family_interactions_df["Species ID"] == species_id]
+
+        family_interactions_df = family_interactions_df.filter(items=["miR Family", "Gene Symbol"], axis="columns")
+        family_to_miR_df = family_to_miR_df.filter(items=['miR family', 'MiRBase ID'], axis="columns")
+        family_to_miR_df = family_to_miR_df.rename(columns={'miR family': 'miR Family'})
+
+        # map miRBase ID names to miR Family
+        # family_interactions_df = pd.merge(family_interactions_df, family_to_miR_df, how='outer', on="miR Family")
+
+        family_to_miR_df.set_genes_index("miR Family", inplace=True)
+        family_interactions_df.set_genes_index("miR Family", inplace=True)
+        mir_interactions_df = family_interactions_df.join(family_to_miR_df, how='outer', on="miR Family").reset_index()
+
+        # Standardize MiRBase ID to miRNA names obtained from RNA-seq hg19
+        if self.strip_mirna_name:
+            mir_interactions_df['MiRBase ID'] = mir_interactions_df['MiRBase ID'].str.lower()
+            mir_interactions_df['MiRBase ID'] = mir_interactions_df['MiRBase ID'].str.replace("-3p.*|-5p.*", "")
+
+        return mir_interactions_df
+
+
+class LncReg(Interactions):
+    """Loads the  database from  .
+
+    Default path:  .
+    Default file_resources: {
+        "": "",
+        "": "",
+        "": "",
+    }
+    """
+    def __init__(self, path, file_resources,
+                 source_col_name='A_name_in_paper', target_col_name='B_name_in_paper',
+                 source_index="transcript_name", target_index="gene_name",
+                 edge_attr=["relationship", "mechanism", "pmid"], filters=None, directed=True, relabel_nodes=None,
+                 verbose=False):
+        if file_resources is None:
+            file_resources = {}
+            file_resources["data.xlsx"] = os.path.join(path, "data.xlsx")
+
+        super().__init__(path, file_resources=file_resources, source_col_name=source_col_name,
+                         target_col_name=target_col_name, edge_attr=edge_attr, filters=filters, directed=directed,
+                         relabel_nodes=relabel_nodes, verbose=verbose)
+
+    def load_network(self, file_resources, source_col_name, target_col_name, edge_attr, directed, filters,
+                     blocksize=None):
+        df = pd.read_excel(self.file_resources["data.xlsx"])
+        print(self.name(), df.columns.tolist())
+
+        df = df[df["species"] == "Homo sapiens"]
+        df.loc[df["B_category"] == "miRNA", "B_name_in_paper"] = df[df["B_category"] == "miRNA"][
+            "B_name_in_paper"].str.replace("-3p.*|-5p.*", "")
+        df.loc[df["B_category"] == "miRNA", "B_name_in_paper"] = df[df["B_category"] == "miRNA"][
+            "B_name_in_paper"].str.replace("MIR", "hsa-mir-")
+        df.loc[df["B_category"] == "miRNA", "B_name_in_paper"] = df[df["B_category"] == "miRNA"][
+            "B_name_in_paper"].str.replace("let-", "hsa-let-")
+
+        LncReg_lncRNA_RNA_network = nx.from_pandas_edgelist(df, source=source_col_name, target=target_col_name,
+                                                            edge_attr=edge_attr,
+                                                            create_using=nx.DiGraph())
+        return LncReg_lncRNA_RNA_network
+
+
+class lncRInter(Interactions):
+    """Loads the  database from  .
+
+    Default path:  .
+    Default file_resources: {
+        "": "",
+        "": "",
+        "": "",
+    }
+    """
+
+    def __init__(self, path, file_resources=None, source_col_name="lncrna",
+                 target_col_name='Interacting partner',
+                 edge_attr=None, filters=None,
+                 directed=True, relabel_nodes=None, **kwargs):
+        if edge_attr is None:
+            edge_attr = ["Interaction Class", "Interaction Mode", "Tissue", "Phenotype"]
+        if file_resources is None:
+            file_resources = {}
+            file_resources["human_interactions.txt"] = os.path.join(path, "human_interactions.txt")
+
+        super().__init__(path, file_resources, source_col_name=source_col_name, target_col_name=target_col_name,
+                         edge_attr=edge_attr, filters=filters, directed=directed, relabel_nodes=relabel_nodes, **kwargs)
+
+    def load_network(self, file_resources, source_col_name, target_col_name, edge_attr, directed, filters,
+                     blocksize=None):
+        lncRInter_df = pd.read_table(file_resources["human_interactions.txt"])
+        print(self.name(), lncRInter_df.columns.tolist())
+
+        lncRInter_df = filter_rows(lncRInter_df, filters)
+        # Data cleaning
+        lncRInter_df.loc[lncRInter_df["Interacting partner"].str.contains("MIR"), "Interacting partner"] = \
+            lncRInter_df.loc[
+                lncRInter_df["Interacting partner"].str.contains("MIR"), "Interacting partner"].str.lower()
+        lncRInter_df["Interacting partner"] = lncRInter_df["Interacting partner"].str.replace("mirlet", "hsa-let-")
+        lncRInter_df["Interacting partner"] = lncRInter_df["Interacting partner"].str.replace("mir", "hsa-mir-")
+        lncRInter_df["Interacting partner"][
+            lncRInter_df["Interacting partner"].str.contains(r"[mir|let]\-[\d]+[a-z]+[\d]+")] = \
+            lncRInter_df["Interacting partner"][
+                lncRInter_df["Interacting partner"].str.contains(r"[mir|let]\-[\d]+[a-z]+[\d]+")].apply(
+                lambda x: x[:-1] + "-" + x[-1])
+
+        lncRInter_network = nx.from_pandas_edgelist(lncRInter_df, source=source_col_name,
+                                                    target=target_col_name,
+                                                    edge_attr=edge_attr,
+                                                    create_using=nx.DiGraph() if directed else nx.Graph())
+        return lncRInter_network
+
+
+class LncRNA2Target(Interactions):
+    """Loads the  database from  .
+
+            Default path:  .
+            Default file_resources: {
+                "": "",
+                "": "",
+                "": "",
+            }
+            """
+
+    def __init__(self, path="http://123.59.132.21/lncrna2target/data/", file_resources=None, edge_attr=None,
+                 filters=None,
+                 directed=True, relabel_nodes=None, version="high_throughput", **kwargs):
+        """
+
+        Args:
+            filters (): default None, example {"species_id": 9606, "Species": "Homo sapiens"}.
+            version (str): one of ["high_throughput", "low_throughput"].
+                The high_throughput version of lncRNA2Target database is v2.0 and low_throughput is v1.0, according to the database's website.
+            species_id (str, int): one of [9606, "Homo sapiens"].
+                The species column in high_throughput is formatted in int (e.g. 9606) and in low_throughput is in str (e.g. "Homo sapiens")
+        """
+        self.version = version
+        if file_resources is None:
+            file_resources = {}
+            file_resources["lncRNA_target_from_high_throughput_experiments.txt.rar"] = \
+                os.path.join(path, "lncrna_target.rar")
+            file_resources["lncRNA_target_from_low_throughput_experiments.xlsx"] = \
+                os.path.join(path, "lncRNA_target_from_low_throughput_experiments.xlsx")
+
+        if self.version == "high_throughput":
+            super().__init__(path, file_resources, source_col_name="lncrna_symbol", target_col_name="gene_symbol",
+                             edge_attr=edge_attr, filters=filters, directed=directed, relabel_nodes=relabel_nodes,
+                             **kwargs)
+        if self.version == "low_throughput":
+            super().__init__(path, file_resources, source_col_name="GENCODE_gene_name",
+                             target_col_name="Target_official_symbol", edge_attr=edge_attr, filters=filters,
+                             directed=directed, relabel_nodes=relabel_nodes, **kwargs)
+
+    def load_network(self, file_resources, source_col_name, target_col_name, edge_attr, directed, filters,
+                     blocksize=None):
+        network = None
+        if self.version == "high_throughput":
+            network = self.load_network_high_throughput(file_resources, source_col_name, target_col_name, edge_attr,
+                                                        directed)
+        elif self.version == "low_throughput":
+            network = self.load_network_low_throughput(file_resources, source_col_name, target_col_name, edge_attr,
+                                                       directed)
+        else:
+            logger.warn("LncRNA2Target version argument must be one of 'high_throughput' or 'low_throughput'")
+
+        return network
+
+    def load_network_high_throughput(self, file_resources, source_col_name="lncrna_symbol",
+                                     target_col_name="gene_symbol",
+                                     edge_attr=None, directed=True, filters=None):
+        edges = pd.read_table(file_resources["lncRNA_target_from_high_throughput_experiments.txt"], sep="\t")
+        edges = filter_rows(edges, filters)
+
+        edges["lncrna_symbol"] = edges["lncrna_symbol"].str.upper()
+        edges["lncrna_symbol"] = edges["lncrna_symbol"].str.replace("LINC", "")
+        edges["gene_symbol"] = edges["gene_symbol"].str.upper()
+
+        self.data = self.edges = edges
+        lncrna2target_high_throughput_network = nx.from_pandas_edgelist(edges,
+                                                                        source=source_col_name,
+                                                                        target=target_col_name,
+                                                                        edge_attr=edge_attr,
+                                                                        create_using=nx.DiGraph() if directed else nx.Graph())
+        return lncrna2target_high_throughput_network
+
+    def load_network_low_throughput(self, file_resources, source_col_name="GENCODE_gene_name",
+                                    target_col_name="Target_official_symbol",
+                                    edge_attr=None, directed=True, filters=None):
+        edges = pd.read_excel(file_resources["lncRNA_target_from_low_throughput_experiments.xlsx"])
+        edges = filter_rows(edges, filters)
+
+        edges["Target_official_symbol"] = edges["Target_official_symbol"].str.replace("(?i)(mir)", "hsa-mir-",
+                                                                                      regex=True)
+        edges["Target_official_symbol"] = edges["Target_official_symbol"].str.replace("--", "-")
+        edges["Target_official_symbol"].apply(lambda x: x.lower() if "mir" in x.lower() else x.upper())
+        edges["GENCODE_gene_name"] = edges["GENCODE_gene_name"].str.upper()
+
+        self.data = self.edges = edges
+        lncrna2target_low_throughput_network = nx.from_pandas_edgelist(edges,
+                                                                       source=source_col_name,
+                                                                       target=target_col_name,
+                                                                       edge_attr=edge_attr,
+                                                                       create_using=nx.DiGraph() if directed else nx.Graph())
+        return lncrna2target_low_throughput_network
+
+
+class lncRNome(Interactions, Database):
+    """Loads the lncRNome database from  .
+
+    Default path:  .
+    Default file_resources: {
+        "": "",
+        "": "",
+        "": "",
+    }
+    """
+
+    def __init__(self, path, file_resources, source_col_name='Gene Name', target_col_name='Binding miRNAs',
+                 edge_attr=["miRNA Interaction Site", "Transcript ID"], directed=True, relabel_nodes=None,
+                 **kwargs):
+        if file_resources is None:
+            file_resources = {}
+            file_resources["miRNA_binding_sites.txt"] = os.path.join(path, "miRNA_binding_sites.txt")
+            file_resources["general_information.txt"] = os.path.join(path, "general_information.txt")
+
+        super().__init__(path, file_resources=file_resources, source_col_name=source_col_name,
+                         target_col_name=target_col_name,
+                         directed=directed, relabel_nodes=relabel_nodes, edge_attr=edge_attr, **kwargs)
+
+    def load_network(self, file_resources, source_col_name, target_col_name, edge_attr, directed, filters,
+                     blocksize=None):
+        df = pd.read_table(self.file_resources["miRNA_binding_sites.txt"], header=0)
+        print(self.name(), df.columns.tolist())
+
+        df['Binding miRNAs'] = df['Binding miRNAs'].str.lower()
+        df['Binding miRNAs'] = df['Binding miRNAs'].str.replace("-3p.*|-5p.*", "", regex=True)
+
+        lncRNome_miRNA_binding_sites_network = nx.from_pandas_edgelist(df, source=source_col_name,
+                                                                       target=target_col_name,
+                                                                       edge_attr=edge_attr,
+                                                                       create_using=nx.DiGraph())
+
+        return lncRNome_miRNA_binding_sites_network
+
+    def load_dataframe(self, file_resources, blocksize=None):
+        return pd.read_table(self.file_resources["general_information.txt"], header=0,
+                             usecols=["Gene Name", "Transcript Name", "Transcript Type", "Location", "Strand"])
+
+
+class NPInter(Interactions):
+    """Loads the NPInter database from http://bigdata.ibp.ac.cn/npinter4/ .
+
+    Default path: "http://bigdata.ibp.ac.cn/npinter4/download/" .
+    Default file_resources: {
+        "interaction_NPInterv4.expr.txt": "file/interaction_NPInterv4.expr.txt.gz",
+    }
+    """
+    def __init__(self, path="http://bigdata.ibp.ac.cn/npinter4/download/", file_resources=None,
+                 source_col_name='ncName', target_col_name='tarName',
+                 edge_attr=["tarType", "tissueOrCell", "tag", 'class', "level"],
+                 filters=None,
+                 directed=True, relabel_nodes=None, verbose=False):
+        if file_resources is None:
+            file_resources = {}
+            file_resources["interaction_NPInterv4.expr.txt.gz"] = \
+                os.path.join(path, "file/interaction_NPInterv4.expr.txt.gz")
+
+        super().__init__(path=path, file_resources=file_resources, source_col_name=source_col_name,
+                         target_col_name=target_col_name, edge_attr=edge_attr, filters=filters, directed=directed,
+                         relabel_nodes=relabel_nodes, verbose=verbose)
+
+    def load_dataframe(self, file_resources: Dict[str, str], blocksize: int = None) -> pd.DataFrame:
+        df = pd.read_table(file_resources["interaction_NPInterv4.expr.txt"], header=0, na_values=["-"])
+        print(self.name(), df.columns.tolist())
+        df["ncName"] = df["ncName"].str.upper()
+        df["ncName"] = df["ncName"].str.strip("LNCRNA-")
+        df["ncName"] = df["ncName"].str.replace("MALAT-1", "MALAT1")
+        df["ncName"] = df["ncName"].str.replace("^MIR-", "hsa-mir-", regex=True)
+        df["ncName"] = df["ncName"].str.replace("^MICRORNA-", "hsa-mir-", regex=True)
+
+        df["tarName"] = df["tarName"].str.upper()
+
+        return df
+
+    def load_network(self, file_resources, source_col_name, target_col_name, edge_attr, directed, filters,
+                     blocksize=None):
+        df = self.data
+        df = filter_rows(df, filters)
+
+        lncRNome_miRNA_binding_sites_network = nx.from_pandas_edgelist(df, source=source_col_name,
+                                                                       target=target_col_name,
+                                                                       edge_attr=edge_attr,
+                                                                       create_using=nx.DiGraph() if directed else nx.Graph())
+
+        return lncRNome_miRNA_binding_sites_network
+
+
+class StarBase(Interactions):
+    """Loads the  database from  .
+
+    Default path:  .
+    Default file_resources: {
+        "": "",
+        "": "",
+        "": "",
+    }
+    """
+
+    def __init__(self, path, file_resources, source_col_name="geneName", target_col_name="pairGeneName",
+                 min_interactionNum=1, min_expNum=1,
+                 edge_attr=None, directed=True, relabel_nodes=None, **kwargs):
+        if file_resources is None:
+            file_resources = {}
+            file_resources["starbase_3.0_lncrna_rna_interactions.csv"] = \
+                os.path.join(path, "starbase_3.0_lncrna_rna_interactions.csv")
+        self.min_interactionNum = min_interactionNum
+        self.min_expNum = min_expNum
+        super().__init__(path, file_resources, source_col_name=source_col_name, target_col_name=target_col_name,
+                         directed=directed, relabel_nodes=relabel_nodes, edge_attr=edge_attr, **kwargs)
+
+    def load_network(self, file_resources, source_col_name, target_col_name, edge_attr, directed, filters,
+                     blocksize=None):
+        df = pd.read_csv(self.file_resources["starbase_3.0_lncrna_rna_interactions.csv"], header=0)
+
+        df.loc[df["pairGeneType"] == "miRNA", "pairGeneName"] = df[df["pairGeneType"] == "miRNA"][
+            "pairGeneName"].str.lower()
+        df.loc[df["pairGeneType"] == "miRNA", "pairGeneName"] = df[df["pairGeneType"] == "miRNA"][
+            "pairGeneName"].str.replace("-3p.*|-5p.*", "")
+        df = df[df["interactionNum"] >= self.min_interactionNum]
+        df = df[df["expNum"] >= self.min_expNum]
+
+        self.starBase_RNA_RNA_network = nx.from_pandas_edgelist(df, source=source_col_name, target=target_col_name,
+                                                                edge_attr=["interactionNum"],
+                                                                create_using=nx.DiGraph())
+        return self.starBase_RNA_RNA_network